]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_compressor_backing_store.c
8f6971fb4ae17b1d24823074df1ab8accc5a338a
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32
33 #include <IOKit/IOHibernatePrivate.h>
34
35 #include <kern/policy_internal.h>
36
37 boolean_t compressor_store_stop_compaction = FALSE;
38 boolean_t vm_swapfile_create_needed = FALSE;
39 boolean_t vm_swapfile_gc_needed = FALSE;
40
41 int vm_swapper_throttle = -1;
42 uint64_t vm_swapout_thread_id;
43
44 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
45 uint64_t vm_swap_get_failures = 0; /* Fatal */
46 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
47 int vm_num_swap_files_config = 0;
48 int vm_num_swap_files = 0;
49 int vm_num_pinned_swap_files = 0;
50 int vm_swapout_thread_processed_segments = 0;
51 int vm_swapout_thread_awakened = 0;
52 int vm_swapfile_create_thread_awakened = 0;
53 int vm_swapfile_create_thread_running = 0;
54 int vm_swapfile_gc_thread_awakened = 0;
55 int vm_swapfile_gc_thread_running = 0;
56
57 int64_t vm_swappin_avail = 0;
58 boolean_t vm_swappin_enabled = FALSE;
59 unsigned int vm_swapfile_total_segs_alloced = 0;
60 unsigned int vm_swapfile_total_segs_used = 0;
61
62 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
63
64 extern vm_map_t compressor_map;
65
66
67 #define SWAP_READY 0x1 /* Swap file is ready to be used */
68 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
69 #define SWAP_WANTED 0x4 /* Swap file has waiters */
70 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
71 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
72
73
74 struct swapfile {
75 queue_head_t swp_queue; /* list of swap files */
76 char *swp_path; /* saved pathname of swap file */
77 struct vnode *swp_vp; /* backing vnode */
78 uint64_t swp_size; /* size of this swap file */
79 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
80 unsigned int swp_pathlen; /* length of pathname */
81 unsigned int swp_nsegs; /* #segments we can use */
82 unsigned int swp_nseginuse; /* #segments in use */
83 unsigned int swp_index; /* index of this swap file */
84 unsigned int swp_flags; /* state of swap file */
85 unsigned int swp_free_hint; /* offset of 1st free chunk */
86 unsigned int swp_io_count; /* count of outstanding I/Os */
87 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
88
89 struct trim_list *swp_delayed_trim_list_head;
90 unsigned int swp_delayed_trim_count;
91 };
92
93 queue_head_t swf_global_queue;
94 boolean_t swp_trim_supported = FALSE;
95
96 extern clock_sec_t dont_trim_until_ts;
97 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
98 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
99 int vm_swapfile_can_be_created = FALSE;
100 boolean_t delayed_trim_handling_in_progress = FALSE;
101
102 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
103
104 static void vm_swapout_thread_throttle_adjust(void);
105 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
106 static void vm_swapout_thread(void);
107 static void vm_swapfile_create_thread(void);
108 static void vm_swapfile_gc_thread(void);
109 static void vm_swap_defragment(void);
110 static void vm_swap_handle_delayed_trims(boolean_t);
111 static void vm_swap_do_delayed_trim(struct swapfile *);
112 static void vm_swap_wait_on_trim_handling_in_progress(void);
113
114
115 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
116
117 #if CONFIG_EMBEDDED
118
119 #if DEVELOPMENT || DEBUG
120 #define VM_MAX_SWAP_FILE_NUM 100
121 #else /* DEVELOPMENT || DEBUG */
122 #define VM_MAX_SWAP_FILE_NUM 5
123 #endif /* DEVELOPMENT || DEBUG */
124
125 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
126
127 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
128 #define VM_SWAP_SHOULD_PIN(_size) FALSE
129 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
130 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
131 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
132
133 #else /* CONFIG_EMBEDDED */
134
135 #define VM_MAX_SWAP_FILE_NUM 100
136 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
137
138 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
139 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
140 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
141 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
142 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
143
144 #endif /* CONFIG_EMBEDDED */
145
146 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
147 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
148 #define VM_SWAPFILE_DELAYED_CREATE 15
149
150 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
151
152
153 #if CHECKSUM_THE_SWAP
154 extern unsigned int hash_string(char *cp, int len);
155 #endif
156
157 #if RECORD_THE_COMPRESSED_DATA
158 boolean_t c_compressed_record_init_done = FALSE;
159 int c_compressed_record_write_error = 0;
160 struct vnode *c_compressed_record_vp = NULL;
161 uint64_t c_compressed_record_file_offset = 0;
162 void c_compressed_record_init(void);
163 void c_compressed_record_write(char *, int);
164 #endif
165
166 extern void vm_pageout_io_throttle(void);
167
168 static struct swapfile *vm_swapfile_for_handle(uint64_t);
169
170 /*
171 * Called with the vm_swap_data_lock held.
172 */
173
174 static struct swapfile *
175 vm_swapfile_for_handle(uint64_t f_offset)
176 {
177 uint64_t file_offset = 0;
178 unsigned int swapfile_index = 0;
179 struct swapfile* swf = NULL;
180
181 file_offset = (f_offset & SWAP_SLOT_MASK);
182 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
183
184 swf = (struct swapfile*) queue_first(&swf_global_queue);
185
186 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
187 if (swapfile_index == swf->swp_index) {
188 break;
189 }
190
191 swf = (struct swapfile*) queue_next(&swf->swp_queue);
192 }
193
194 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
195 swf = NULL;
196 }
197
198 return swf;
199 }
200
201 #if ENCRYPTED_SWAP
202
203 #include <libkern/crypto/aesxts.h>
204
205 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
206
207 boolean_t swap_crypt_initialized;
208 void swap_crypt_initialize(void);
209
210 symmetric_xts xts_modectx;
211 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
212 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
213
214 #if DEVELOPMENT || DEBUG
215 boolean_t swap_crypt_xts_tested = FALSE;
216 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
217 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
218 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
219 #endif /* DEVELOPMENT || DEBUG */
220
221 unsigned long vm_page_encrypt_counter;
222 unsigned long vm_page_decrypt_counter;
223
224
225 void
226 swap_crypt_initialize(void)
227 {
228 uint8_t *enckey1, *enckey2;
229 int keylen1, keylen2;
230 int error;
231
232 assert(swap_crypt_initialized == FALSE);
233
234 keylen1 = sizeof(swap_crypt_key1);
235 enckey1 = (uint8_t *)&swap_crypt_key1;
236 keylen2 = sizeof(swap_crypt_key2);
237 enckey2 = (uint8_t *)&swap_crypt_key2;
238
239 error = cc_rand_generate((void *)enckey1, keylen1);
240 assert(!error);
241
242 error = cc_rand_generate((void *)enckey2, keylen2);
243 assert(!error);
244
245 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
246 assert(!error);
247
248 swap_crypt_initialized = TRUE;
249
250 #if DEVELOPMENT || DEBUG
251 uint8_t *encptr;
252 uint8_t *decptr;
253 uint8_t *refptr;
254 uint8_t *iv;
255 uint64_t ivnum[2];
256 int size = 0;
257 int i = 0;
258 int rc = 0;
259
260 assert(swap_crypt_xts_tested == FALSE);
261
262 /*
263 * Validate the encryption algorithms.
264 *
265 * First initialize the test data.
266 */
267 for (i = 0; i < 4096; i++) {
268 swap_crypt_test_page_ref[i] = (char) i;
269 }
270 ivnum[0] = (uint64_t)0xaa;
271 ivnum[1] = 0;
272 iv = (uint8_t *)ivnum;
273
274 refptr = (uint8_t *)swap_crypt_test_page_ref;
275 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
276 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
277 size = 4096;
278
279 /* encrypt */
280 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
281 assert(!rc);
282
283 /* compare result with original - should NOT match */
284 for (i = 0; i < 4096; i++) {
285 if (swap_crypt_test_page_encrypt[i] !=
286 swap_crypt_test_page_ref[i]) {
287 break;
288 }
289 }
290 assert(i != 4096);
291
292 /* decrypt */
293 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
294 assert(!rc);
295
296 /* compare result with original */
297 for (i = 0; i < 4096; i++) {
298 if (swap_crypt_test_page_decrypt[i] !=
299 swap_crypt_test_page_ref[i]) {
300 panic("encryption test failed");
301 }
302 }
303 /* encrypt in place */
304 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
305 assert(!rc);
306
307 /* decrypt in place */
308 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
309 assert(!rc);
310
311 for (i = 0; i < 4096; i++) {
312 if (swap_crypt_test_page_decrypt[i] !=
313 swap_crypt_test_page_ref[i]) {
314 panic("in place encryption test failed");
315 }
316 }
317 swap_crypt_xts_tested = TRUE;
318 #endif /* DEVELOPMENT || DEBUG */
319 }
320
321
322 void
323 vm_swap_encrypt(c_segment_t c_seg)
324 {
325 uint8_t *ptr;
326 uint8_t *iv;
327 uint64_t ivnum[2];
328 int size = 0;
329 int rc = 0;
330
331 if (swap_crypt_initialized == FALSE) {
332 swap_crypt_initialize();
333 }
334
335 #if DEVELOPMENT || DEBUG
336 C_SEG_MAKE_WRITEABLE(c_seg);
337 #endif
338 ptr = (uint8_t *)c_seg->c_store.c_buffer;
339 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
340
341 ivnum[0] = (uint64_t)c_seg;
342 ivnum[1] = 0;
343 iv = (uint8_t *)ivnum;
344
345 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
346 assert(!rc);
347
348 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
349
350 #if DEVELOPMENT || DEBUG
351 C_SEG_WRITE_PROTECT(c_seg);
352 #endif
353 }
354
355 void
356 vm_swap_decrypt(c_segment_t c_seg)
357 {
358 uint8_t *ptr;
359 uint8_t *iv;
360 uint64_t ivnum[2];
361 int size = 0;
362 int rc = 0;
363
364 assert(swap_crypt_initialized);
365
366 #if DEVELOPMENT || DEBUG
367 C_SEG_MAKE_WRITEABLE(c_seg);
368 #endif
369 ptr = (uint8_t *)c_seg->c_store.c_buffer;
370 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
371
372 ivnum[0] = (uint64_t)c_seg;
373 ivnum[1] = 0;
374 iv = (uint8_t *)ivnum;
375
376 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
377 assert(!rc);
378
379 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
380
381 #if DEVELOPMENT || DEBUG
382 C_SEG_WRITE_PROTECT(c_seg);
383 #endif
384 }
385 #endif /* ENCRYPTED_SWAP */
386
387
388 void
389 vm_compressor_swap_init()
390 {
391 thread_t thread = NULL;
392
393 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
394 lck_grp_init(&vm_swap_data_lock_grp,
395 "vm_swap_data",
396 &vm_swap_data_lock_grp_attr);
397 lck_attr_setdefault(&vm_swap_data_lock_attr);
398 lck_mtx_init_ext(&vm_swap_data_lock,
399 &vm_swap_data_lock_ext,
400 &vm_swap_data_lock_grp,
401 &vm_swap_data_lock_attr);
402
403 queue_init(&swf_global_queue);
404
405
406 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
407 BASEPRI_VM, &thread) != KERN_SUCCESS) {
408 panic("vm_swapout_thread: create failed");
409 }
410 thread_set_thread_name(thread, "VM_swapout");
411 vm_swapout_thread_id = thread->thread_id;
412
413 thread_deallocate(thread);
414
415 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
416 BASEPRI_VM, &thread) != KERN_SUCCESS) {
417 panic("vm_swapfile_create_thread: create failed");
418 }
419
420 thread_set_thread_name(thread, "VM_swapfile_create");
421 thread_deallocate(thread);
422
423 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
424 BASEPRI_VM, &thread) != KERN_SUCCESS) {
425 panic("vm_swapfile_gc_thread: create failed");
426 }
427 thread_set_thread_name(thread, "VM_swapfile_gc");
428 thread_deallocate(thread);
429
430 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
431 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
432 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
433 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
434
435 #if CONFIG_EMBEDDED
436 /*
437 * dummy value until the swap file gets created
438 * when we drive the first c_segment_t to the
439 * swapout queue... at that time we will
440 * know the true size we have to work with
441 */
442 c_overage_swapped_limit = 16;
443 #endif
444
445 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
446
447 printf("VM Swap Subsystem is ON\n");
448 }
449
450
451 #if RECORD_THE_COMPRESSED_DATA
452
453 void
454 c_compressed_record_init()
455 {
456 if (c_compressed_record_init_done == FALSE) {
457 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
458 c_compressed_record_init_done = TRUE;
459 }
460 }
461
462 void
463 c_compressed_record_write(char *buf, int size)
464 {
465 if (c_compressed_record_write_error == 0) {
466 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
467 c_compressed_record_file_offset += size;
468 }
469 }
470 #endif
471
472
473 int compaction_swapper_inited = 0;
474
475 void
476 vm_compaction_swapper_do_init(void)
477 {
478 struct vnode *vp;
479 char *pathname;
480 int namelen;
481
482 if (compaction_swapper_inited) {
483 return;
484 }
485
486 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
487 compaction_swapper_inited = 1;
488 return;
489 }
490 lck_mtx_lock(&vm_swap_data_lock);
491
492 if (!compaction_swapper_inited) {
493 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
494 pathname = (char*)kalloc(namelen);
495 memset(pathname, 0, namelen);
496 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
497
498 vm_swapfile_open(pathname, &vp);
499
500 if (vp) {
501 if (vnode_pager_isSSD(vp) == FALSE) {
502 /*
503 * swap files live on an HDD, so let's make sure to start swapping
504 * much earlier since we're not worried about SSD write-wear and
505 * we have so little write bandwidth to work with
506 * these values were derived expermentially by running the performance
507 * teams stock test for evaluating HDD performance against various
508 * combinations and looking and comparing overall results.
509 * Note that the > relationship between these 4 values must be maintained
510 */
511 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
512 vm_compressor_minorcompact_threshold_divisor = 15;
513 }
514 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
515 vm_compressor_majorcompact_threshold_divisor = 18;
516 }
517 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
518 vm_compressor_unthrottle_threshold_divisor = 24;
519 }
520 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
521 vm_compressor_catchup_threshold_divisor = 30;
522 }
523 }
524 #if !CONFIG_EMBEDDED
525 vnode_setswapmount(vp);
526 vm_swappin_avail = vnode_getswappin_avail(vp);
527
528 if (vm_swappin_avail) {
529 vm_swappin_enabled = TRUE;
530 }
531 #endif
532 vm_swapfile_close((uint64_t)pathname, vp);
533 }
534 kfree(pathname, namelen);
535
536 compaction_swapper_inited = 1;
537 }
538 lck_mtx_unlock(&vm_swap_data_lock);
539 }
540
541
542 void
543 vm_swap_consider_defragmenting(int flags)
544 {
545 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
546 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
547
548 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
549 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
550 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
551 lck_mtx_lock(&vm_swap_data_lock);
552
553 if (force_defrag) {
554 vm_swap_force_defrag = TRUE;
555 }
556
557 if (force_reclaim) {
558 vm_swap_force_reclaim = TRUE;
559 }
560
561 if (!vm_swapfile_gc_thread_running) {
562 thread_wakeup((event_t) &vm_swapfile_gc_needed);
563 }
564
565 lck_mtx_unlock(&vm_swap_data_lock);
566 }
567 }
568 }
569
570
571 int vm_swap_defragment_yielded = 0;
572 int vm_swap_defragment_swapin = 0;
573 int vm_swap_defragment_free = 0;
574 int vm_swap_defragment_busy = 0;
575
576
577 static void
578 vm_swap_defragment()
579 {
580 c_segment_t c_seg;
581
582 /*
583 * have to grab the master lock w/o holding
584 * any locks in spin mode
585 */
586 PAGE_REPLACEMENT_DISALLOWED(TRUE);
587
588 lck_mtx_lock_spin_always(c_list_lock);
589
590 while (!queue_empty(&c_swappedout_sparse_list_head)) {
591 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
592 vm_swap_defragment_yielded++;
593 break;
594 }
595 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
596
597 lck_mtx_lock_spin_always(&c_seg->c_lock);
598
599 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
600
601 if (c_seg->c_busy) {
602 lck_mtx_unlock_always(c_list_lock);
603
604 PAGE_REPLACEMENT_DISALLOWED(FALSE);
605 /*
606 * c_seg_wait_on_busy consumes c_seg->c_lock
607 */
608 c_seg_wait_on_busy(c_seg);
609
610 PAGE_REPLACEMENT_DISALLOWED(TRUE);
611
612 lck_mtx_lock_spin_always(c_list_lock);
613
614 vm_swap_defragment_busy++;
615 continue;
616 }
617 if (c_seg->c_bytes_used == 0) {
618 /*
619 * c_seg_free_locked consumes the c_list_lock
620 * and c_seg->c_lock
621 */
622 C_SEG_BUSY(c_seg);
623 c_seg_free_locked(c_seg);
624
625 vm_swap_defragment_free++;
626 } else {
627 lck_mtx_unlock_always(c_list_lock);
628
629 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
630 lck_mtx_unlock_always(&c_seg->c_lock);
631 }
632
633 vm_swap_defragment_swapin++;
634 }
635 PAGE_REPLACEMENT_DISALLOWED(FALSE);
636
637 vm_pageout_io_throttle();
638
639 /*
640 * because write waiters have privilege over readers,
641 * dropping and immediately retaking the master lock will
642 * still allow any thread waiting to acquire the
643 * master lock exclusively an opportunity to take it
644 */
645 PAGE_REPLACEMENT_DISALLOWED(TRUE);
646
647 lck_mtx_lock_spin_always(c_list_lock);
648 }
649 lck_mtx_unlock_always(c_list_lock);
650
651 PAGE_REPLACEMENT_DISALLOWED(FALSE);
652 }
653
654
655
656 static void
657 vm_swapfile_create_thread(void)
658 {
659 clock_sec_t sec;
660 clock_nsec_t nsec;
661
662 current_thread()->options |= TH_OPT_VMPRIV;
663
664 vm_swapfile_create_thread_awakened++;
665 vm_swapfile_create_thread_running = 1;
666
667 while (TRUE) {
668 /*
669 * walk through the list of swap files
670 * and do the delayed frees/trims for
671 * any swap file whose count of delayed
672 * frees is above the batch limit
673 */
674 vm_swap_handle_delayed_trims(FALSE);
675
676 lck_mtx_lock(&vm_swap_data_lock);
677
678 if (hibernate_in_progress_with_pinned_swap == TRUE) {
679 break;
680 }
681
682 clock_get_system_nanotime(&sec, &nsec);
683
684 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
685 break;
686 }
687
688 lck_mtx_unlock(&vm_swap_data_lock);
689
690 if (vm_swap_create_file() == FALSE) {
691 vm_swapfile_last_failed_to_create_ts = sec;
692 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
693 } else {
694 vm_swapfile_last_successful_create_ts = sec;
695 }
696 }
697 vm_swapfile_create_thread_running = 0;
698
699 if (hibernate_in_progress_with_pinned_swap == TRUE) {
700 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
701 }
702
703 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
704
705 lck_mtx_unlock(&vm_swap_data_lock);
706
707 thread_block((thread_continue_t)vm_swapfile_create_thread);
708
709 /* NOTREACHED */
710 }
711
712
713 #if HIBERNATION
714
715 kern_return_t
716 hibernate_pin_swap(boolean_t start)
717 {
718 vm_compaction_swapper_do_init();
719
720 if (start == FALSE) {
721 lck_mtx_lock(&vm_swap_data_lock);
722 hibernate_in_progress_with_pinned_swap = FALSE;
723 lck_mtx_unlock(&vm_swap_data_lock);
724
725 return KERN_SUCCESS;
726 }
727 if (vm_swappin_enabled == FALSE) {
728 return KERN_SUCCESS;
729 }
730
731 lck_mtx_lock(&vm_swap_data_lock);
732
733 hibernate_in_progress_with_pinned_swap = TRUE;
734
735 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
736 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
737
738 lck_mtx_unlock(&vm_swap_data_lock);
739
740 thread_block(THREAD_CONTINUE_NULL);
741
742 lck_mtx_lock(&vm_swap_data_lock);
743 }
744 if (vm_num_swap_files > vm_num_pinned_swap_files) {
745 hibernate_in_progress_with_pinned_swap = FALSE;
746 lck_mtx_unlock(&vm_swap_data_lock);
747
748 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
749 vm_num_swap_files, vm_num_pinned_swap_files);
750 return KERN_FAILURE;
751 }
752 lck_mtx_unlock(&vm_swap_data_lock);
753
754 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
755 if (vm_swap_create_file() == FALSE) {
756 break;
757 }
758 }
759 return KERN_SUCCESS;
760 }
761 #endif
762
763 static void
764 vm_swapfile_gc_thread(void)
765 {
766 boolean_t need_defragment;
767 boolean_t need_reclaim;
768
769 vm_swapfile_gc_thread_awakened++;
770 vm_swapfile_gc_thread_running = 1;
771
772 while (TRUE) {
773 lck_mtx_lock(&vm_swap_data_lock);
774
775 if (hibernate_in_progress_with_pinned_swap == TRUE) {
776 break;
777 }
778
779 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
780 break;
781 }
782
783 need_defragment = FALSE;
784 need_reclaim = FALSE;
785
786 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
787 need_defragment = TRUE;
788 }
789
790 if (VM_SWAP_SHOULD_RECLAIM()) {
791 need_defragment = TRUE;
792 need_reclaim = TRUE;
793 }
794 if (need_defragment == FALSE && need_reclaim == FALSE) {
795 break;
796 }
797
798 vm_swap_force_defrag = FALSE;
799 vm_swap_force_reclaim = FALSE;
800
801 lck_mtx_unlock(&vm_swap_data_lock);
802
803 if (need_defragment == TRUE) {
804 vm_swap_defragment();
805 }
806 if (need_reclaim == TRUE) {
807 vm_swap_reclaim();
808 }
809 }
810 vm_swapfile_gc_thread_running = 0;
811
812 if (hibernate_in_progress_with_pinned_swap == TRUE) {
813 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
814 }
815
816 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
817
818 lck_mtx_unlock(&vm_swap_data_lock);
819
820 thread_block((thread_continue_t)vm_swapfile_gc_thread);
821
822 /* NOTREACHED */
823 }
824
825
826
827 #define VM_SWAPOUT_LIMIT_T2P 4
828 #define VM_SWAPOUT_LIMIT_T1P 4
829 #define VM_SWAPOUT_LIMIT_T0P 6
830 #define VM_SWAPOUT_LIMIT_T0 8
831 #define VM_SWAPOUT_LIMIT_MAX 8
832
833 #define VM_SWAPOUT_START 0
834 #define VM_SWAPOUT_T2_PASSIVE 1
835 #define VM_SWAPOUT_T1_PASSIVE 2
836 #define VM_SWAPOUT_T0_PASSIVE 3
837 #define VM_SWAPOUT_T0 4
838
839 int vm_swapout_state = VM_SWAPOUT_START;
840 int vm_swapout_limit = 1;
841
842 int vm_swapper_entered_T0 = 0;
843 int vm_swapper_entered_T0P = 0;
844 int vm_swapper_entered_T1P = 0;
845 int vm_swapper_entered_T2P = 0;
846
847
848 static void
849 vm_swapout_thread_throttle_adjust(void)
850 {
851 switch (vm_swapout_state) {
852 case VM_SWAPOUT_START:
853
854 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
855 vm_swapper_entered_T2P++;
856
857 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
858 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
859 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
860 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
861 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
862 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
863
864 break;
865
866 case VM_SWAPOUT_T2_PASSIVE:
867
868 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
869 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
870 vm_swapper_entered_T0P++;
871
872 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
873 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
874 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
875 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
876 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
877 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
878
879 break;
880 }
881 if (swapout_target_age || hibernate_flushing == TRUE) {
882 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
883 vm_swapper_entered_T1P++;
884
885 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
886 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
887 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
888 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
889 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
890 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
891 }
892 break;
893
894 case VM_SWAPOUT_T1_PASSIVE:
895
896 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
897 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
898 vm_swapper_entered_T0P++;
899
900 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
901 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
902 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
903 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
904 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
905 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
906
907 break;
908 }
909 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
910 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
911 vm_swapper_entered_T2P++;
912
913 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
914 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
915 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
916 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
917 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
918 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
919 }
920 break;
921
922 case VM_SWAPOUT_T0_PASSIVE:
923
924 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
925 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
926 vm_swapper_entered_T2P++;
927
928 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
929 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
930 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
931 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
932 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
933 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
934
935 break;
936 }
937 if (SWAPPER_NEEDS_TO_CATCHUP()) {
938 vm_swapper_entered_T0++;
939
940 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
941 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
942 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
943 vm_swapout_state = VM_SWAPOUT_T0;
944 }
945 break;
946
947 case VM_SWAPOUT_T0:
948
949 if (SWAPPER_HAS_CAUGHTUP()) {
950 vm_swapper_entered_T0P++;
951
952 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
953 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
954 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
955 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
956 }
957 break;
958 }
959 }
960
961 int vm_swapout_found_empty = 0;
962
963 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
964
965 int vm_swapout_soc_busy = 0;
966 int vm_swapout_soc_done = 0;
967
968
969 static struct swapout_io_completion *
970 vm_swapout_find_free_soc(void)
971 {
972 int i;
973
974 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
975 if (vm_swapout_ctx[i].swp_io_busy == 0) {
976 return &vm_swapout_ctx[i];
977 }
978 }
979 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
980
981 return NULL;
982 }
983
984 static struct swapout_io_completion *
985 vm_swapout_find_done_soc(void)
986 {
987 int i;
988
989 if (vm_swapout_soc_done) {
990 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
991 if (vm_swapout_ctx[i].swp_io_done) {
992 return &vm_swapout_ctx[i];
993 }
994 }
995 }
996 return NULL;
997 }
998
999 static void
1000 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1001 {
1002 kern_return_t kr;
1003
1004 if (soc->swp_io_error) {
1005 kr = KERN_FAILURE;
1006 } else {
1007 kr = KERN_SUCCESS;
1008 }
1009
1010 lck_mtx_unlock_always(c_list_lock);
1011
1012 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error);
1013 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1014
1015 lck_mtx_lock_spin_always(c_list_lock);
1016
1017 soc->swp_io_done = 0;
1018 soc->swp_io_busy = 0;
1019
1020 vm_swapout_soc_busy--;
1021 vm_swapout_soc_done--;
1022 }
1023
1024
1025 static void
1026 vm_swapout_thread(void)
1027 {
1028 uint32_t size = 0;
1029 c_segment_t c_seg = NULL;
1030 kern_return_t kr = KERN_SUCCESS;
1031 struct swapout_io_completion *soc;
1032
1033 current_thread()->options |= TH_OPT_VMPRIV;
1034
1035 vm_swapout_thread_awakened++;
1036
1037 lck_mtx_lock_spin_always(c_list_lock);
1038 again:
1039 while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit) {
1040 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1041
1042 lck_mtx_lock_spin_always(&c_seg->c_lock);
1043
1044 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1045
1046 if (c_seg->c_busy) {
1047 lck_mtx_unlock_always(c_list_lock);
1048
1049 c_seg_wait_on_busy(c_seg);
1050
1051 lck_mtx_lock_spin_always(c_list_lock);
1052
1053 continue;
1054 }
1055 vm_swapout_thread_processed_segments++;
1056
1057 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1058
1059 if (size == 0) {
1060 assert(c_seg->c_bytes_used == 0);
1061
1062 if (!c_seg->c_on_minorcompact_q) {
1063 c_seg_need_delayed_compaction(c_seg, TRUE);
1064 }
1065
1066 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1067 lck_mtx_unlock_always(&c_seg->c_lock);
1068 lck_mtx_unlock_always(c_list_lock);
1069
1070 vm_swapout_found_empty++;
1071 goto c_seg_is_empty;
1072 }
1073 C_SEG_BUSY(c_seg);
1074 c_seg->c_busy_swapping = 1;
1075
1076 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1077
1078 lck_mtx_unlock_always(c_list_lock);
1079 lck_mtx_unlock_always(&c_seg->c_lock);
1080
1081 #if CHECKSUM_THE_SWAP
1082 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1083 c_seg->cseg_swap_size = size;
1084 #endif /* CHECKSUM_THE_SWAP */
1085
1086 #if ENCRYPTED_SWAP
1087 vm_swap_encrypt(c_seg);
1088 #endif /* ENCRYPTED_SWAP */
1089
1090 soc = vm_swapout_find_free_soc();
1091 assert(soc);
1092
1093 soc->swp_upl_ctx.io_context = (void *)soc;
1094 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1095 soc->swp_upl_ctx.io_error = 0;
1096
1097 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1098
1099 if (kr != KERN_SUCCESS) {
1100 if (soc->swp_io_done) {
1101 lck_mtx_lock_spin_always(c_list_lock);
1102
1103 soc->swp_io_done = 0;
1104 vm_swapout_soc_done--;
1105
1106 lck_mtx_unlock_always(c_list_lock);
1107 }
1108 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1109 } else {
1110 soc->swp_io_busy = 1;
1111 vm_swapout_soc_busy++;
1112 }
1113 vm_swapout_thread_throttle_adjust();
1114 vm_pageout_io_throttle();
1115
1116 c_seg_is_empty:
1117 if (c_swapout_count == 0) {
1118 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1119 }
1120
1121 lck_mtx_lock_spin_always(c_list_lock);
1122
1123 if ((soc = vm_swapout_find_done_soc())) {
1124 vm_swapout_complete_soc(soc);
1125 }
1126 }
1127 if ((soc = vm_swapout_find_done_soc())) {
1128 vm_swapout_complete_soc(soc);
1129 goto again;
1130 }
1131 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1132
1133 lck_mtx_unlock_always(c_list_lock);
1134
1135 thread_block((thread_continue_t)vm_swapout_thread);
1136
1137 /* NOTREACHED */
1138 }
1139
1140
1141 void
1142 vm_swapout_iodone(void *io_context, int error)
1143 {
1144 struct swapout_io_completion *soc;
1145
1146 soc = (struct swapout_io_completion *)io_context;
1147
1148 lck_mtx_lock_spin_always(c_list_lock);
1149
1150 soc->swp_io_done = 1;
1151 soc->swp_io_error = error;
1152 vm_swapout_soc_done++;
1153
1154 thread_wakeup((event_t)&c_swapout_list_head);
1155
1156 lck_mtx_unlock_always(c_list_lock);
1157 }
1158
1159
1160 static void
1161 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1162 {
1163 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1164
1165 if (kr == KERN_SUCCESS) {
1166 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size, KMA_COMPRESSOR);
1167 }
1168 #if ENCRYPTED_SWAP
1169 else {
1170 vm_swap_decrypt(c_seg);
1171 }
1172 #endif /* ENCRYPTED_SWAP */
1173 lck_mtx_lock_spin_always(c_list_lock);
1174 lck_mtx_lock_spin_always(&c_seg->c_lock);
1175
1176 if (kr == KERN_SUCCESS) {
1177 int new_state = C_ON_SWAPPEDOUT_Q;
1178 boolean_t insert_head = FALSE;
1179
1180 if (hibernate_flushing == TRUE) {
1181 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1182 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1183 insert_head = TRUE;
1184 }
1185 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1186 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1187 }
1188
1189 c_seg_switch_state(c_seg, new_state, insert_head);
1190
1191 c_seg->c_store.c_swap_handle = f_offset;
1192
1193 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
1194
1195 if (c_seg->c_bytes_used) {
1196 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1197 }
1198 } else {
1199 if (c_seg->c_overage_swap == TRUE) {
1200 c_seg->c_overage_swap = FALSE;
1201 c_overage_swapped_count--;
1202 }
1203 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1204
1205 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1206 c_seg_need_delayed_compaction(c_seg, TRUE);
1207 }
1208 }
1209 assert(c_seg->c_busy_swapping);
1210 assert(c_seg->c_busy);
1211
1212 c_seg->c_busy_swapping = 0;
1213 lck_mtx_unlock_always(c_list_lock);
1214
1215 C_SEG_WAKEUP_DONE(c_seg);
1216 lck_mtx_unlock_always(&c_seg->c_lock);
1217
1218 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1219 }
1220
1221
1222 boolean_t
1223 vm_swap_create_file()
1224 {
1225 uint64_t size = 0;
1226 int namelen = 0;
1227 boolean_t swap_file_created = FALSE;
1228 boolean_t swap_file_reuse = FALSE;
1229 boolean_t swap_file_pin = FALSE;
1230 struct swapfile *swf = NULL;
1231
1232 /*
1233 * make sure we've got all the info we need
1234 * to potentially pin a swap file... we could
1235 * be swapping out due to hibernation w/o ever
1236 * having run vm_pageout_scan, which is normally
1237 * the trigger to do the init
1238 */
1239 vm_compaction_swapper_do_init();
1240
1241 /*
1242 * Any swapfile structure ready for re-use?
1243 */
1244
1245 lck_mtx_lock(&vm_swap_data_lock);
1246
1247 swf = (struct swapfile*) queue_first(&swf_global_queue);
1248
1249 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1250 if (swf->swp_flags == SWAP_REUSE) {
1251 swap_file_reuse = TRUE;
1252 break;
1253 }
1254 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1255 }
1256
1257 lck_mtx_unlock(&vm_swap_data_lock);
1258
1259 if (swap_file_reuse == FALSE) {
1260 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1261
1262 swf = (struct swapfile*) kalloc(sizeof *swf);
1263 memset(swf, 0, sizeof(*swf));
1264
1265 swf->swp_index = vm_num_swap_files + 1;
1266 swf->swp_pathlen = namelen;
1267 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
1268
1269 memset(swf->swp_path, 0, namelen);
1270
1271 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1272 }
1273
1274 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1275
1276 if (swf->swp_vp == NULL) {
1277 if (swap_file_reuse == FALSE) {
1278 kfree(swf->swp_path, swf->swp_pathlen);
1279 kfree(swf, sizeof *swf);
1280 }
1281 return FALSE;
1282 }
1283 vm_swapfile_can_be_created = TRUE;
1284
1285 size = MAX_SWAP_FILE_SIZE;
1286
1287 while (size >= MIN_SWAP_FILE_SIZE) {
1288 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1289
1290 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1291 int num_bytes_for_bitmap = 0;
1292
1293 swap_file_created = TRUE;
1294
1295 swf->swp_size = size;
1296 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
1297 swf->swp_nseginuse = 0;
1298 swf->swp_free_hint = 0;
1299
1300 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1301 /*
1302 * Allocate a bitmap that describes the
1303 * number of segments held by this swapfile.
1304 */
1305 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
1306 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
1307
1308 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
1309 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
1310
1311 /*
1312 * passing a NULL trim_list into vnode_trim_list
1313 * will return ENOTSUP if trim isn't supported
1314 * and 0 if it is
1315 */
1316 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1317 swp_trim_supported = TRUE;
1318 }
1319
1320 lck_mtx_lock(&vm_swap_data_lock);
1321
1322 swf->swp_flags = SWAP_READY;
1323
1324 if (swap_file_reuse == FALSE) {
1325 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1326 }
1327
1328 vm_num_swap_files++;
1329
1330 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1331
1332 if (swap_file_pin == TRUE) {
1333 vm_num_pinned_swap_files++;
1334 swf->swp_flags |= SWAP_PINNED;
1335 vm_swappin_avail -= swf->swp_size;
1336 }
1337
1338 lck_mtx_unlock(&vm_swap_data_lock);
1339
1340 thread_wakeup((event_t) &vm_num_swap_files);
1341 #if CONFIG_EMBEDDED
1342 if (vm_num_swap_files == 1) {
1343 c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;
1344
1345 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1346 c_overage_swapped_limit /= 2;
1347 }
1348 }
1349 #endif
1350 break;
1351 } else {
1352 size = size / 2;
1353 }
1354 }
1355 if (swap_file_created == FALSE) {
1356 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1357
1358 swf->swp_vp = NULL;
1359
1360 if (swap_file_reuse == FALSE) {
1361 kfree(swf->swp_path, swf->swp_pathlen);
1362 kfree(swf, sizeof *swf);
1363 }
1364 }
1365 return swap_file_created;
1366 }
1367
1368
1369 kern_return_t
1370 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1371 {
1372 struct swapfile *swf = NULL;
1373 uint64_t file_offset = 0;
1374 int retval = 0;
1375
1376 assert(c_seg->c_store.c_buffer);
1377
1378 lck_mtx_lock(&vm_swap_data_lock);
1379
1380 swf = vm_swapfile_for_handle(f_offset);
1381
1382 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1383 vm_swap_get_failures++;
1384 retval = 1;
1385 goto done;
1386 }
1387 swf->swp_io_count++;
1388
1389 lck_mtx_unlock(&vm_swap_data_lock);
1390
1391 #if DEVELOPMENT || DEBUG
1392 C_SEG_MAKE_WRITEABLE(c_seg);
1393 #endif
1394 file_offset = (f_offset & SWAP_SLOT_MASK);
1395 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1396
1397 #if DEVELOPMENT || DEBUG
1398 C_SEG_WRITE_PROTECT(c_seg);
1399 #endif
1400 if (retval == 0) {
1401 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
1402 } else {
1403 vm_swap_get_failures++;
1404 }
1405
1406 /*
1407 * Free this slot in the swap structure.
1408 */
1409 vm_swap_free(f_offset);
1410
1411 lck_mtx_lock(&vm_swap_data_lock);
1412 swf->swp_io_count--;
1413
1414 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1415 swf->swp_flags &= ~SWAP_WANTED;
1416 thread_wakeup((event_t) &swf->swp_flags);
1417 }
1418 done:
1419 lck_mtx_unlock(&vm_swap_data_lock);
1420
1421 if (retval == 0) {
1422 return KERN_SUCCESS;
1423 } else {
1424 return KERN_FAILURE;
1425 }
1426 }
1427
1428 kern_return_t
1429 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1430 {
1431 unsigned int segidx = 0;
1432 struct swapfile *swf = NULL;
1433 uint64_t file_offset = 0;
1434 uint64_t swapfile_index = 0;
1435 unsigned int byte_for_segidx = 0;
1436 unsigned int offset_within_byte = 0;
1437 boolean_t swf_eligible = FALSE;
1438 boolean_t waiting = FALSE;
1439 boolean_t retried = FALSE;
1440 int error = 0;
1441 clock_sec_t sec;
1442 clock_nsec_t nsec;
1443 void *upl_ctx = NULL;
1444
1445 if (addr == 0 || f_offset == NULL) {
1446 return KERN_FAILURE;
1447 }
1448 retry:
1449 lck_mtx_lock(&vm_swap_data_lock);
1450
1451 swf = (struct swapfile*) queue_first(&swf_global_queue);
1452
1453 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1454 segidx = swf->swp_free_hint;
1455
1456 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1457
1458 if (swf_eligible) {
1459 while (segidx < swf->swp_nsegs) {
1460 byte_for_segidx = segidx >> 3;
1461 offset_within_byte = segidx % 8;
1462
1463 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1464 segidx++;
1465 continue;
1466 }
1467
1468 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1469
1470 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1471 swf->swp_nseginuse++;
1472 swf->swp_io_count++;
1473 swf->swp_csegs[segidx] = c_seg;
1474
1475 swapfile_index = swf->swp_index;
1476 vm_swapfile_total_segs_used++;
1477
1478 clock_get_system_nanotime(&sec, &nsec);
1479
1480 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1481 thread_wakeup((event_t) &vm_swapfile_create_needed);
1482 }
1483
1484 lck_mtx_unlock(&vm_swap_data_lock);
1485
1486 goto issue_io;
1487 }
1488 }
1489 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1490 }
1491 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1492
1493 /*
1494 * we've run out of swap segments, but may not
1495 * be in a position to immediately create a new swap
1496 * file if we've recently failed to create due to a lack
1497 * of free space in the root filesystem... we'll try
1498 * to kick that create off, but in any event we're going
1499 * to take a breather (up to 1 second) so that we're not caught in a tight
1500 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1501 * segments into swap files only to have them immediately put back
1502 * on the c_age queue due to vm_swap_put failing.
1503 *
1504 * if we're doing these puts due to a hibernation flush,
1505 * no need to block... setting hibernate_no_swapspace to TRUE,
1506 * will cause "vm_compressor_compact_and_swap" to immediately abort
1507 */
1508 clock_get_system_nanotime(&sec, &nsec);
1509
1510 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1511 thread_wakeup((event_t) &vm_swapfile_create_needed);
1512 }
1513
1514 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1515 waiting = TRUE;
1516 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1517 } else {
1518 hibernate_no_swapspace = TRUE;
1519 }
1520
1521 lck_mtx_unlock(&vm_swap_data_lock);
1522
1523 if (waiting == TRUE) {
1524 thread_block(THREAD_CONTINUE_NULL);
1525
1526 if (retried == FALSE && hibernate_flushing == TRUE) {
1527 retried = TRUE;
1528 goto retry;
1529 }
1530 }
1531 vm_swap_put_failures_no_swap_file++;
1532
1533 return KERN_FAILURE;
1534
1535 issue_io:
1536 assert(c_seg->c_busy_swapping);
1537 assert(c_seg->c_busy);
1538 assert(!c_seg->c_on_minorcompact_q);
1539
1540 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1541
1542 if (soc) {
1543 soc->swp_c_seg = c_seg;
1544 soc->swp_c_size = size;
1545
1546 soc->swp_swf = swf;
1547
1548 soc->swp_io_error = 0;
1549 soc->swp_io_done = 0;
1550
1551 upl_ctx = (void *)&soc->swp_upl_ctx;
1552 }
1553 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1554
1555 if (error || upl_ctx == NULL) {
1556 return vm_swap_put_finish(swf, f_offset, error);
1557 }
1558
1559 return KERN_SUCCESS;
1560 }
1561
1562 kern_return_t
1563 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error)
1564 {
1565 lck_mtx_lock(&vm_swap_data_lock);
1566
1567 swf->swp_io_count--;
1568
1569 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1570 swf->swp_flags &= ~SWAP_WANTED;
1571 thread_wakeup((event_t) &swf->swp_flags);
1572 }
1573 lck_mtx_unlock(&vm_swap_data_lock);
1574
1575 if (error) {
1576 vm_swap_free(*f_offset);
1577 vm_swap_put_failures++;
1578
1579 return KERN_FAILURE;
1580 }
1581 return KERN_SUCCESS;
1582 }
1583
1584
1585 static void
1586 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1587 {
1588 uint64_t file_offset = 0;
1589 unsigned int segidx = 0;
1590
1591
1592 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1593 unsigned int byte_for_segidx = 0;
1594 unsigned int offset_within_byte = 0;
1595
1596 file_offset = (f_offset & SWAP_SLOT_MASK);
1597 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1598
1599 byte_for_segidx = segidx >> 3;
1600 offset_within_byte = segidx % 8;
1601
1602 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1603 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1604
1605 swf->swp_csegs[segidx] = NULL;
1606
1607 swf->swp_nseginuse--;
1608 vm_swapfile_total_segs_used--;
1609
1610 if (segidx < swf->swp_free_hint) {
1611 swf->swp_free_hint = segidx;
1612 }
1613 }
1614 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1615 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1616 }
1617 }
1618 }
1619
1620
1621 uint32_t vm_swap_free_now_count = 0;
1622 uint32_t vm_swap_free_delayed_count = 0;
1623
1624
1625 void
1626 vm_swap_free(uint64_t f_offset)
1627 {
1628 struct swapfile *swf = NULL;
1629 struct trim_list *tl = NULL;
1630 clock_sec_t sec;
1631 clock_nsec_t nsec;
1632
1633 if (swp_trim_supported == TRUE) {
1634 tl = kalloc(sizeof(struct trim_list));
1635 }
1636
1637 lck_mtx_lock(&vm_swap_data_lock);
1638
1639 swf = vm_swapfile_for_handle(f_offset);
1640
1641 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1642 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1643 /*
1644 * don't delay the free if the underlying disk doesn't support
1645 * trim, or we're in the midst of reclaiming this swap file since
1646 * we don't want to move segments that are technically free
1647 * but not yet handled by the delayed free mechanism
1648 */
1649 vm_swap_free_now(swf, f_offset);
1650
1651 vm_swap_free_now_count++;
1652 goto done;
1653 }
1654 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1655 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1656
1657 tl->tl_next = swf->swp_delayed_trim_list_head;
1658 swf->swp_delayed_trim_list_head = tl;
1659 swf->swp_delayed_trim_count++;
1660 tl = NULL;
1661
1662 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1663 clock_get_system_nanotime(&sec, &nsec);
1664
1665 if (sec > dont_trim_until_ts) {
1666 thread_wakeup((event_t) &vm_swapfile_create_needed);
1667 }
1668 }
1669 vm_swap_free_delayed_count++;
1670 }
1671 done:
1672 lck_mtx_unlock(&vm_swap_data_lock);
1673
1674 if (tl != NULL) {
1675 kfree(tl, sizeof(struct trim_list));
1676 }
1677 }
1678
1679
1680 static void
1681 vm_swap_wait_on_trim_handling_in_progress()
1682 {
1683 while (delayed_trim_handling_in_progress == TRUE) {
1684 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1685 lck_mtx_unlock(&vm_swap_data_lock);
1686
1687 thread_block(THREAD_CONTINUE_NULL);
1688
1689 lck_mtx_lock(&vm_swap_data_lock);
1690 }
1691 }
1692
1693
1694 static void
1695 vm_swap_handle_delayed_trims(boolean_t force_now)
1696 {
1697 struct swapfile *swf = NULL;
1698
1699 /*
1700 * serialize the race between us and vm_swap_reclaim...
1701 * if vm_swap_reclaim wins it will turn off SWAP_READY
1702 * on the victim it has chosen... we can just skip over
1703 * that file since vm_swap_reclaim will first process
1704 * all of the delayed trims associated with it
1705 */
1706 lck_mtx_lock(&vm_swap_data_lock);
1707
1708 delayed_trim_handling_in_progress = TRUE;
1709
1710 lck_mtx_unlock(&vm_swap_data_lock);
1711
1712 /*
1713 * no need to hold the lock to walk the swf list since
1714 * vm_swap_create (the only place where we add to this list)
1715 * is run on the same thread as this function
1716 * and vm_swap_reclaim doesn't remove items from this list
1717 * instead marking them with SWAP_REUSE for future re-use
1718 */
1719 swf = (struct swapfile*) queue_first(&swf_global_queue);
1720
1721 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1722 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1723 assert(!(swf->swp_flags & SWAP_RECLAIM));
1724 vm_swap_do_delayed_trim(swf);
1725 }
1726 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1727 }
1728 lck_mtx_lock(&vm_swap_data_lock);
1729
1730 delayed_trim_handling_in_progress = FALSE;
1731 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1732
1733 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1734 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1735 }
1736
1737 lck_mtx_unlock(&vm_swap_data_lock);
1738 }
1739
1740 static void
1741 vm_swap_do_delayed_trim(struct swapfile *swf)
1742 {
1743 struct trim_list *tl, *tl_head;
1744
1745 lck_mtx_lock(&vm_swap_data_lock);
1746
1747 tl_head = swf->swp_delayed_trim_list_head;
1748 swf->swp_delayed_trim_list_head = NULL;
1749 swf->swp_delayed_trim_count = 0;
1750
1751 lck_mtx_unlock(&vm_swap_data_lock);
1752
1753 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1754
1755 while ((tl = tl_head) != NULL) {
1756 unsigned int segidx = 0;
1757 unsigned int byte_for_segidx = 0;
1758 unsigned int offset_within_byte = 0;
1759
1760 lck_mtx_lock(&vm_swap_data_lock);
1761
1762 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1763
1764 byte_for_segidx = segidx >> 3;
1765 offset_within_byte = segidx % 8;
1766
1767 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1768 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1769
1770 swf->swp_csegs[segidx] = NULL;
1771
1772 swf->swp_nseginuse--;
1773 vm_swapfile_total_segs_used--;
1774
1775 if (segidx < swf->swp_free_hint) {
1776 swf->swp_free_hint = segidx;
1777 }
1778 }
1779 lck_mtx_unlock(&vm_swap_data_lock);
1780
1781 tl_head = tl->tl_next;
1782
1783 kfree(tl, sizeof(struct trim_list));
1784 }
1785 }
1786
1787
1788 void
1789 vm_swap_flush()
1790 {
1791 return;
1792 }
1793
1794 int vm_swap_reclaim_yielded = 0;
1795
1796 void
1797 vm_swap_reclaim(void)
1798 {
1799 vm_offset_t addr = 0;
1800 unsigned int segidx = 0;
1801 uint64_t f_offset = 0;
1802 struct swapfile *swf = NULL;
1803 struct swapfile *smallest_swf = NULL;
1804 unsigned int min_nsegs = 0;
1805 unsigned int byte_for_segidx = 0;
1806 unsigned int offset_within_byte = 0;
1807 uint32_t c_size = 0;
1808
1809 c_segment_t c_seg = NULL;
1810
1811 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1812 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1813 }
1814
1815 lck_mtx_lock(&vm_swap_data_lock);
1816
1817 /*
1818 * if we're running the swapfile list looking for
1819 * candidates with delayed trims, we need to
1820 * wait before making our decision concerning
1821 * the swapfile we want to reclaim
1822 */
1823 vm_swap_wait_on_trim_handling_in_progress();
1824
1825 /*
1826 * from here until we knock down the SWAP_READY bit,
1827 * we need to remain behind the vm_swap_data_lock...
1828 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1829 * will not consider this swapfile for processing
1830 */
1831 swf = (struct swapfile*) queue_first(&swf_global_queue);
1832 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1833 smallest_swf = NULL;
1834
1835 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1836 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1837 smallest_swf = swf;
1838 min_nsegs = swf->swp_nseginuse;
1839 }
1840 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1841 }
1842
1843 if (smallest_swf == NULL) {
1844 goto done;
1845 }
1846
1847 swf = smallest_swf;
1848
1849
1850 swf->swp_flags &= ~SWAP_READY;
1851 swf->swp_flags |= SWAP_RECLAIM;
1852
1853 if (swf->swp_delayed_trim_count) {
1854 lck_mtx_unlock(&vm_swap_data_lock);
1855
1856 vm_swap_do_delayed_trim(swf);
1857
1858 lck_mtx_lock(&vm_swap_data_lock);
1859 }
1860 segidx = 0;
1861
1862 while (segidx < swf->swp_nsegs) {
1863 ReTry_for_cseg:
1864 /*
1865 * Wait for outgoing I/Os.
1866 */
1867 while (swf->swp_io_count) {
1868 swf->swp_flags |= SWAP_WANTED;
1869
1870 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1871 lck_mtx_unlock(&vm_swap_data_lock);
1872
1873 thread_block(THREAD_CONTINUE_NULL);
1874
1875 lck_mtx_lock(&vm_swap_data_lock);
1876 }
1877 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1878 vm_swap_reclaim_yielded++;
1879 break;
1880 }
1881
1882 byte_for_segidx = segidx >> 3;
1883 offset_within_byte = segidx % 8;
1884
1885 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1886 segidx++;
1887 continue;
1888 }
1889
1890 c_seg = swf->swp_csegs[segidx];
1891 assert(c_seg);
1892
1893 lck_mtx_lock_spin_always(&c_seg->c_lock);
1894
1895 if (c_seg->c_busy) {
1896 /*
1897 * a swapped out c_segment in the process of being freed will remain in the
1898 * busy state until after the vm_swap_free is called on it... vm_swap_free
1899 * takes the vm_swap_data_lock, so can't change the swap state until after
1900 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1901 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1902 * at that point, we re-look up the swap state which will now indicate that
1903 * this c_segment no longer exists.
1904 */
1905 c_seg->c_wanted = 1;
1906
1907 assert_wait((event_t) (c_seg), THREAD_UNINT);
1908 lck_mtx_unlock_always(&c_seg->c_lock);
1909
1910 lck_mtx_unlock(&vm_swap_data_lock);
1911
1912 thread_block(THREAD_CONTINUE_NULL);
1913
1914 lck_mtx_lock(&vm_swap_data_lock);
1915
1916 goto ReTry_for_cseg;
1917 }
1918 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1919
1920 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1921
1922 assert(c_seg == swf->swp_csegs[segidx]);
1923 swf->swp_csegs[segidx] = NULL;
1924 swf->swp_nseginuse--;
1925
1926 vm_swapfile_total_segs_used--;
1927
1928 lck_mtx_unlock(&vm_swap_data_lock);
1929
1930 assert(C_SEG_IS_ONDISK(c_seg));
1931
1932 C_SEG_BUSY(c_seg);
1933 c_seg->c_busy_swapping = 1;
1934 #if !CHECKSUM_THE_SWAP
1935 c_seg_trim_tail(c_seg);
1936 #endif
1937 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1938
1939 assert(c_size <= C_SEG_BUFSIZE && c_size);
1940
1941 lck_mtx_unlock_always(&c_seg->c_lock);
1942
1943 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
1944 /*
1945 * reading the data back in failed, so convert c_seg
1946 * to a swapped in c_segment that contains no data
1947 */
1948 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
1949 /*
1950 * returns with c_busy_swapping cleared
1951 */
1952
1953 vm_swap_get_failures++;
1954 goto swap_io_failed;
1955 }
1956 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
1957
1958 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
1959 vm_offset_t c_buffer;
1960
1961 /*
1962 * the put failed, so convert c_seg to a fully swapped in c_segment
1963 * with valid data
1964 */
1965 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
1966
1967 kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1968
1969 memcpy((char *)c_buffer, (char *)addr, c_size);
1970
1971 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
1972 #if ENCRYPTED_SWAP
1973 vm_swap_decrypt(c_seg);
1974 #endif /* ENCRYPTED_SWAP */
1975 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
1976 /*
1977 * returns with c_busy_swapping cleared
1978 */
1979 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
1980
1981 goto swap_io_failed;
1982 }
1983 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
1984
1985 lck_mtx_lock_spin_always(&c_seg->c_lock);
1986
1987 assert(C_SEG_IS_ONDISK(c_seg));
1988 /*
1989 * The c_seg will now know about the new location on disk.
1990 */
1991 c_seg->c_store.c_swap_handle = f_offset;
1992
1993 assert(c_seg->c_busy_swapping);
1994 c_seg->c_busy_swapping = 0;
1995 swap_io_failed:
1996 assert(c_seg->c_busy);
1997 C_SEG_WAKEUP_DONE(c_seg);
1998
1999 lck_mtx_unlock_always(&c_seg->c_lock);
2000 lck_mtx_lock(&vm_swap_data_lock);
2001 }
2002
2003 if (swf->swp_nseginuse) {
2004 swf->swp_flags &= ~SWAP_RECLAIM;
2005 swf->swp_flags |= SWAP_READY;
2006
2007 goto done;
2008 }
2009 /*
2010 * We don't remove this inactive swf from the queue.
2011 * That way, we can re-use it when needed again and
2012 * preserve the namespace. The delayed_trim processing
2013 * is also dependent on us not removing swfs from the queue.
2014 */
2015 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2016
2017 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2018
2019 lck_mtx_unlock(&vm_swap_data_lock);
2020
2021 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2022
2023 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
2024 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2025
2026 lck_mtx_lock(&vm_swap_data_lock);
2027
2028 if (swf->swp_flags & SWAP_PINNED) {
2029 vm_num_pinned_swap_files--;
2030 vm_swappin_avail += swf->swp_size;
2031 }
2032
2033 swf->swp_vp = NULL;
2034 swf->swp_size = 0;
2035 swf->swp_free_hint = 0;
2036 swf->swp_nsegs = 0;
2037 swf->swp_flags = SWAP_REUSE;
2038
2039 vm_num_swap_files--;
2040
2041 done:
2042 thread_wakeup((event_t) &swf->swp_flags);
2043 lck_mtx_unlock(&vm_swap_data_lock);
2044
2045 kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
2046 }
2047
2048
2049 uint64_t
2050 vm_swap_get_total_space(void)
2051 {
2052 uint64_t total_space = 0;
2053
2054 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
2055
2056 return total_space;
2057 }
2058
2059 uint64_t
2060 vm_swap_get_used_space(void)
2061 {
2062 uint64_t used_space = 0;
2063
2064 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
2065
2066 return used_space;
2067 }
2068
2069 uint64_t
2070 vm_swap_get_free_space(void)
2071 {
2072 return vm_swap_get_total_space() - vm_swap_get_used_space();
2073 }
2074
2075
2076 int
2077 vm_swap_low_on_space(void)
2078 {
2079 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2080 return 0;
2081 }
2082
2083 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
2084 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2085 return 0;
2086 }
2087
2088 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2089 return 1;
2090 }
2091 }
2092 return 0;
2093 }
2094
2095 boolean_t
2096 vm_swap_files_pinned(void)
2097 {
2098 boolean_t result;
2099
2100 if (vm_swappin_enabled == FALSE) {
2101 return TRUE;
2102 }
2103
2104 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2105
2106 return result;
2107 }
2108
2109 #if CONFIG_FREEZE
2110 boolean_t
2111 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2112 {
2113 boolean_t use_device_value = FALSE;
2114 struct swapfile *swf = NULL;
2115
2116 if (vm_num_swap_files) {
2117 lck_mtx_lock(&vm_swap_data_lock);
2118
2119 swf = (struct swapfile*) queue_first(&swf_global_queue);
2120
2121 if (swf) {
2122 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2123 if (swf->swp_flags == SWAP_READY) {
2124 assert(swf->swp_vp);
2125
2126 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2127 use_device_value = TRUE;
2128 }
2129 break;
2130 }
2131 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2132 }
2133 }
2134
2135 lck_mtx_unlock(&vm_swap_data_lock);
2136 } else {
2137 /*
2138 * This block is used for the initial budget value before any swap files
2139 * are created. We create a temp swap file to get the budget.
2140 */
2141
2142 struct vnode *temp_vp = NULL;
2143
2144 vm_swapfile_open(swapfilename, &temp_vp);
2145
2146 if (temp_vp) {
2147 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2148 use_device_value = TRUE;
2149 }
2150
2151 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2152 temp_vp = NULL;
2153 } else {
2154 *freeze_daily_budget = 0;
2155 }
2156 }
2157
2158 return use_device_value;
2159 }
2160 #endif /* CONFIG_FREEZE */