]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_compressor_backing_store.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32
33 #include <IOKit/IOHibernatePrivate.h>
34
35 #include <kern/policy_internal.h>
36
37 boolean_t compressor_store_stop_compaction = FALSE;
38 boolean_t vm_swapfile_create_needed = FALSE;
39 boolean_t vm_swapfile_gc_needed = FALSE;
40
41 int vm_swapper_throttle = -1;
42 uint64_t vm_swapout_thread_id;
43
44 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
45 uint64_t vm_swap_get_failures = 0; /* Fatal */
46 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
47 int vm_num_swap_files_config = 0;
48 int vm_num_swap_files = 0;
49 int vm_num_pinned_swap_files = 0;
50 int vm_swapout_thread_processed_segments = 0;
51 int vm_swapout_thread_awakened = 0;
52 int vm_swapfile_create_thread_awakened = 0;
53 int vm_swapfile_create_thread_running = 0;
54 int vm_swapfile_gc_thread_awakened = 0;
55 int vm_swapfile_gc_thread_running = 0;
56
57 int64_t vm_swappin_avail = 0;
58 boolean_t vm_swappin_enabled = FALSE;
59 unsigned int vm_swapfile_total_segs_alloced = 0;
60 unsigned int vm_swapfile_total_segs_used = 0;
61
62 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
63
64 extern vm_map_t compressor_map;
65
66
67 #define SWAP_READY 0x1 /* Swap file is ready to be used */
68 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
69 #define SWAP_WANTED 0x4 /* Swap file has waiters */
70 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
71 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
72
73
74 struct swapfile {
75 queue_head_t swp_queue; /* list of swap files */
76 char *swp_path; /* saved pathname of swap file */
77 struct vnode *swp_vp; /* backing vnode */
78 uint64_t swp_size; /* size of this swap file */
79 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
80 unsigned int swp_pathlen; /* length of pathname */
81 unsigned int swp_nsegs; /* #segments we can use */
82 unsigned int swp_nseginuse; /* #segments in use */
83 unsigned int swp_index; /* index of this swap file */
84 unsigned int swp_flags; /* state of swap file */
85 unsigned int swp_free_hint; /* offset of 1st free chunk */
86 unsigned int swp_io_count; /* count of outstanding I/Os */
87 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
88
89 struct trim_list *swp_delayed_trim_list_head;
90 unsigned int swp_delayed_trim_count;
91 };
92
93 queue_head_t swf_global_queue;
94 boolean_t swp_trim_supported = FALSE;
95
96 extern clock_sec_t dont_trim_until_ts;
97 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
98 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
99 int vm_swapfile_can_be_created = FALSE;
100 boolean_t delayed_trim_handling_in_progress = FALSE;
101
102 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
103
104 static void vm_swapout_thread_throttle_adjust(void);
105 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
106 static void vm_swapout_thread(void);
107 static void vm_swapfile_create_thread(void);
108 static void vm_swapfile_gc_thread(void);
109 static void vm_swap_defragment(void);
110 static void vm_swap_handle_delayed_trims(boolean_t);
111 static void vm_swap_do_delayed_trim(struct swapfile *);
112 static void vm_swap_wait_on_trim_handling_in_progress(void);
113
114
115 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
116
117 #if CONFIG_EMBEDDED
118
119 #if DEVELOPMENT || DEBUG
120 #define VM_MAX_SWAP_FILE_NUM 100
121 #else /* DEVELOPMENT || DEBUG */
122 #define VM_MAX_SWAP_FILE_NUM 5
123 #endif /* DEVELOPMENT || DEBUG */
124
125 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
126
127 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
128 #define VM_SWAP_SHOULD_PIN(_size) FALSE
129 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
130 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
131 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
132
133 #else /* CONFIG_EMBEDDED */
134
135 #define VM_MAX_SWAP_FILE_NUM 100
136 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
137
138 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
139 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
140 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
141 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
142 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
143
144 #endif /* CONFIG_EMBEDDED */
145
146 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
147 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
148 #define VM_SWAPFILE_DELAYED_CREATE 15
149
150 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
151
152
153 #if CHECKSUM_THE_SWAP
154 extern unsigned int hash_string(char *cp, int len);
155 #endif
156
157 #if RECORD_THE_COMPRESSED_DATA
158 boolean_t c_compressed_record_init_done = FALSE;
159 int c_compressed_record_write_error = 0;
160 struct vnode *c_compressed_record_vp = NULL;
161 uint64_t c_compressed_record_file_offset = 0;
162 void c_compressed_record_init(void);
163 void c_compressed_record_write(char *, int);
164 #endif
165
166 extern void vm_pageout_io_throttle(void);
167
168 static struct swapfile *vm_swapfile_for_handle(uint64_t);
169
170 /*
171 * Called with the vm_swap_data_lock held.
172 */
173
174 static struct swapfile *
175 vm_swapfile_for_handle(uint64_t f_offset)
176 {
177 uint64_t file_offset = 0;
178 unsigned int swapfile_index = 0;
179 struct swapfile* swf = NULL;
180
181 file_offset = (f_offset & SWAP_SLOT_MASK);
182 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
183
184 swf = (struct swapfile*) queue_first(&swf_global_queue);
185
186 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
187 if (swapfile_index == swf->swp_index) {
188 break;
189 }
190
191 swf = (struct swapfile*) queue_next(&swf->swp_queue);
192 }
193
194 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
195 swf = NULL;
196 }
197
198 return swf;
199 }
200
201 #if ENCRYPTED_SWAP
202
203 #include <libkern/crypto/aesxts.h>
204
205 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
206
207 boolean_t swap_crypt_initialized;
208 void swap_crypt_initialize(void);
209
210 symmetric_xts xts_modectx;
211 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
212 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
213
214 #if DEVELOPMENT || DEBUG
215 boolean_t swap_crypt_xts_tested = FALSE;
216 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
217 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
218 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
219 #endif /* DEVELOPMENT || DEBUG */
220
221 unsigned long vm_page_encrypt_counter;
222 unsigned long vm_page_decrypt_counter;
223
224
225 void
226 swap_crypt_initialize(void)
227 {
228 uint8_t *enckey1, *enckey2;
229 int keylen1, keylen2;
230 int error;
231
232 assert(swap_crypt_initialized == FALSE);
233
234 keylen1 = sizeof(swap_crypt_key1);
235 enckey1 = (uint8_t *)&swap_crypt_key1;
236 keylen2 = sizeof(swap_crypt_key2);
237 enckey2 = (uint8_t *)&swap_crypt_key2;
238
239 error = cc_rand_generate((void *)enckey1, keylen1);
240 assert(!error);
241
242 error = cc_rand_generate((void *)enckey2, keylen2);
243 assert(!error);
244
245 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
246 assert(!error);
247
248 swap_crypt_initialized = TRUE;
249
250 #if DEVELOPMENT || DEBUG
251 uint8_t *encptr;
252 uint8_t *decptr;
253 uint8_t *refptr;
254 uint8_t *iv;
255 uint64_t ivnum[2];
256 int size = 0;
257 int i = 0;
258 int rc = 0;
259
260 assert(swap_crypt_xts_tested == FALSE);
261
262 /*
263 * Validate the encryption algorithms.
264 *
265 * First initialize the test data.
266 */
267 for (i = 0; i < 4096; i++) {
268 swap_crypt_test_page_ref[i] = (char) i;
269 }
270 ivnum[0] = (uint64_t)0xaa;
271 ivnum[1] = 0;
272 iv = (uint8_t *)ivnum;
273
274 refptr = (uint8_t *)swap_crypt_test_page_ref;
275 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
276 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
277 size = 4096;
278
279 /* encrypt */
280 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
281 assert(!rc);
282
283 /* compare result with original - should NOT match */
284 for (i = 0; i < 4096; i++) {
285 if (swap_crypt_test_page_encrypt[i] !=
286 swap_crypt_test_page_ref[i]) {
287 break;
288 }
289 }
290 assert(i != 4096);
291
292 /* decrypt */
293 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
294 assert(!rc);
295
296 /* compare result with original */
297 for (i = 0; i < 4096; i++) {
298 if (swap_crypt_test_page_decrypt[i] !=
299 swap_crypt_test_page_ref[i]) {
300 panic("encryption test failed");
301 }
302 }
303 /* encrypt in place */
304 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
305 assert(!rc);
306
307 /* decrypt in place */
308 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
309 assert(!rc);
310
311 for (i = 0; i < 4096; i++) {
312 if (swap_crypt_test_page_decrypt[i] !=
313 swap_crypt_test_page_ref[i]) {
314 panic("in place encryption test failed");
315 }
316 }
317 swap_crypt_xts_tested = TRUE;
318 #endif /* DEVELOPMENT || DEBUG */
319 }
320
321
322 void
323 vm_swap_encrypt(c_segment_t c_seg)
324 {
325 uint8_t *ptr;
326 uint8_t *iv;
327 uint64_t ivnum[2];
328 int size = 0;
329 int rc = 0;
330
331 if (swap_crypt_initialized == FALSE) {
332 swap_crypt_initialize();
333 }
334
335 #if DEVELOPMENT || DEBUG
336 C_SEG_MAKE_WRITEABLE(c_seg);
337 #endif
338 ptr = (uint8_t *)c_seg->c_store.c_buffer;
339 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
340
341 ivnum[0] = (uint64_t)c_seg;
342 ivnum[1] = 0;
343 iv = (uint8_t *)ivnum;
344
345 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
346 assert(!rc);
347
348 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
349
350 #if DEVELOPMENT || DEBUG
351 C_SEG_WRITE_PROTECT(c_seg);
352 #endif
353 }
354
355 void
356 vm_swap_decrypt(c_segment_t c_seg)
357 {
358 uint8_t *ptr;
359 uint8_t *iv;
360 uint64_t ivnum[2];
361 int size = 0;
362 int rc = 0;
363
364 assert(swap_crypt_initialized);
365
366 #if DEVELOPMENT || DEBUG
367 C_SEG_MAKE_WRITEABLE(c_seg);
368 #endif
369 ptr = (uint8_t *)c_seg->c_store.c_buffer;
370 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
371
372 ivnum[0] = (uint64_t)c_seg;
373 ivnum[1] = 0;
374 iv = (uint8_t *)ivnum;
375
376 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
377 assert(!rc);
378
379 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
380
381 #if DEVELOPMENT || DEBUG
382 C_SEG_WRITE_PROTECT(c_seg);
383 #endif
384 }
385 #endif /* ENCRYPTED_SWAP */
386
387
388 void
389 vm_compressor_swap_init()
390 {
391 thread_t thread = NULL;
392
393 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
394 lck_grp_init(&vm_swap_data_lock_grp,
395 "vm_swap_data",
396 &vm_swap_data_lock_grp_attr);
397 lck_attr_setdefault(&vm_swap_data_lock_attr);
398 lck_mtx_init_ext(&vm_swap_data_lock,
399 &vm_swap_data_lock_ext,
400 &vm_swap_data_lock_grp,
401 &vm_swap_data_lock_attr);
402
403 queue_init(&swf_global_queue);
404
405
406 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
407 BASEPRI_VM, &thread) != KERN_SUCCESS) {
408 panic("vm_swapout_thread: create failed");
409 }
410 thread_set_thread_name(thread, "VM_swapout");
411 vm_swapout_thread_id = thread->thread_id;
412
413 thread_deallocate(thread);
414
415 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
416 BASEPRI_VM, &thread) != KERN_SUCCESS) {
417 panic("vm_swapfile_create_thread: create failed");
418 }
419
420 thread_set_thread_name(thread, "VM_swapfile_create");
421 thread_deallocate(thread);
422
423 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
424 BASEPRI_VM, &thread) != KERN_SUCCESS) {
425 panic("vm_swapfile_gc_thread: create failed");
426 }
427 thread_set_thread_name(thread, "VM_swapfile_gc");
428
429 /*
430 * Swapfile garbage collection will need to allocate memory
431 * to complete its swap reclaim and in-memory compaction.
432 * So allow it to dip into the reserved VM page pool.
433 */
434 thread_lock(thread);
435 thread->options |= TH_OPT_VMPRIV;
436 thread_unlock(thread);
437
438 thread_deallocate(thread);
439
440 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
441 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
442 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
443 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
444
445 #if CONFIG_EMBEDDED
446 /*
447 * dummy value until the swap file gets created
448 * when we drive the first c_segment_t to the
449 * swapout queue... at that time we will
450 * know the true size we have to work with
451 */
452 c_overage_swapped_limit = 16;
453 #endif
454
455 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
456
457 printf("VM Swap Subsystem is ON\n");
458 }
459
460
461 #if RECORD_THE_COMPRESSED_DATA
462
463 void
464 c_compressed_record_init()
465 {
466 if (c_compressed_record_init_done == FALSE) {
467 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
468 c_compressed_record_init_done = TRUE;
469 }
470 }
471
472 void
473 c_compressed_record_write(char *buf, int size)
474 {
475 if (c_compressed_record_write_error == 0) {
476 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
477 c_compressed_record_file_offset += size;
478 }
479 }
480 #endif
481
482
483 int compaction_swapper_inited = 0;
484
485 void
486 vm_compaction_swapper_do_init(void)
487 {
488 struct vnode *vp;
489 char *pathname;
490 int namelen;
491
492 if (compaction_swapper_inited) {
493 return;
494 }
495
496 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
497 compaction_swapper_inited = 1;
498 return;
499 }
500 lck_mtx_lock(&vm_swap_data_lock);
501
502 if (!compaction_swapper_inited) {
503 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
504 pathname = (char*)kalloc(namelen);
505 memset(pathname, 0, namelen);
506 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
507
508 vm_swapfile_open(pathname, &vp);
509
510 if (vp) {
511 if (vnode_pager_isSSD(vp) == FALSE) {
512 /*
513 * swap files live on an HDD, so let's make sure to start swapping
514 * much earlier since we're not worried about SSD write-wear and
515 * we have so little write bandwidth to work with
516 * these values were derived expermentially by running the performance
517 * teams stock test for evaluating HDD performance against various
518 * combinations and looking and comparing overall results.
519 * Note that the > relationship between these 4 values must be maintained
520 */
521 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
522 vm_compressor_minorcompact_threshold_divisor = 15;
523 }
524 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
525 vm_compressor_majorcompact_threshold_divisor = 18;
526 }
527 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
528 vm_compressor_unthrottle_threshold_divisor = 24;
529 }
530 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
531 vm_compressor_catchup_threshold_divisor = 30;
532 }
533 }
534 #if !CONFIG_EMBEDDED
535 vnode_setswapmount(vp);
536 vm_swappin_avail = vnode_getswappin_avail(vp);
537
538 if (vm_swappin_avail) {
539 vm_swappin_enabled = TRUE;
540 }
541 #endif
542 vm_swapfile_close((uint64_t)pathname, vp);
543 }
544 kfree(pathname, namelen);
545
546 compaction_swapper_inited = 1;
547 }
548 lck_mtx_unlock(&vm_swap_data_lock);
549 }
550
551
552 void
553 vm_swap_consider_defragmenting(int flags)
554 {
555 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
556 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
557
558 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
559 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
560 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
561 lck_mtx_lock(&vm_swap_data_lock);
562
563 if (force_defrag) {
564 vm_swap_force_defrag = TRUE;
565 }
566
567 if (force_reclaim) {
568 vm_swap_force_reclaim = TRUE;
569 }
570
571 if (!vm_swapfile_gc_thread_running) {
572 thread_wakeup((event_t) &vm_swapfile_gc_needed);
573 }
574
575 lck_mtx_unlock(&vm_swap_data_lock);
576 }
577 }
578 }
579
580
581 int vm_swap_defragment_yielded = 0;
582 int vm_swap_defragment_swapin = 0;
583 int vm_swap_defragment_free = 0;
584 int vm_swap_defragment_busy = 0;
585
586
587 static void
588 vm_swap_defragment()
589 {
590 c_segment_t c_seg;
591
592 /*
593 * have to grab the master lock w/o holding
594 * any locks in spin mode
595 */
596 PAGE_REPLACEMENT_DISALLOWED(TRUE);
597
598 lck_mtx_lock_spin_always(c_list_lock);
599
600 while (!queue_empty(&c_swappedout_sparse_list_head)) {
601 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
602 vm_swap_defragment_yielded++;
603 break;
604 }
605 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
606
607 lck_mtx_lock_spin_always(&c_seg->c_lock);
608
609 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
610
611 if (c_seg->c_busy) {
612 lck_mtx_unlock_always(c_list_lock);
613
614 PAGE_REPLACEMENT_DISALLOWED(FALSE);
615 /*
616 * c_seg_wait_on_busy consumes c_seg->c_lock
617 */
618 c_seg_wait_on_busy(c_seg);
619
620 PAGE_REPLACEMENT_DISALLOWED(TRUE);
621
622 lck_mtx_lock_spin_always(c_list_lock);
623
624 vm_swap_defragment_busy++;
625 continue;
626 }
627 if (c_seg->c_bytes_used == 0) {
628 /*
629 * c_seg_free_locked consumes the c_list_lock
630 * and c_seg->c_lock
631 */
632 C_SEG_BUSY(c_seg);
633 c_seg_free_locked(c_seg);
634
635 vm_swap_defragment_free++;
636 } else {
637 lck_mtx_unlock_always(c_list_lock);
638
639 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
640 lck_mtx_unlock_always(&c_seg->c_lock);
641 }
642
643 vm_swap_defragment_swapin++;
644 }
645 PAGE_REPLACEMENT_DISALLOWED(FALSE);
646
647 vm_pageout_io_throttle();
648
649 /*
650 * because write waiters have privilege over readers,
651 * dropping and immediately retaking the master lock will
652 * still allow any thread waiting to acquire the
653 * master lock exclusively an opportunity to take it
654 */
655 PAGE_REPLACEMENT_DISALLOWED(TRUE);
656
657 lck_mtx_lock_spin_always(c_list_lock);
658 }
659 lck_mtx_unlock_always(c_list_lock);
660
661 PAGE_REPLACEMENT_DISALLOWED(FALSE);
662 }
663
664
665
666 static void
667 vm_swapfile_create_thread(void)
668 {
669 clock_sec_t sec;
670 clock_nsec_t nsec;
671
672 current_thread()->options |= TH_OPT_VMPRIV;
673
674 vm_swapfile_create_thread_awakened++;
675 vm_swapfile_create_thread_running = 1;
676
677 while (TRUE) {
678 /*
679 * walk through the list of swap files
680 * and do the delayed frees/trims for
681 * any swap file whose count of delayed
682 * frees is above the batch limit
683 */
684 vm_swap_handle_delayed_trims(FALSE);
685
686 lck_mtx_lock(&vm_swap_data_lock);
687
688 if (hibernate_in_progress_with_pinned_swap == TRUE) {
689 break;
690 }
691
692 if (compressor_store_stop_compaction == TRUE) {
693 break;
694 }
695
696 clock_get_system_nanotime(&sec, &nsec);
697
698 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
699 break;
700 }
701
702 lck_mtx_unlock(&vm_swap_data_lock);
703
704 if (vm_swap_create_file() == FALSE) {
705 vm_swapfile_last_failed_to_create_ts = sec;
706 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
707 } else {
708 vm_swapfile_last_successful_create_ts = sec;
709 }
710 }
711 vm_swapfile_create_thread_running = 0;
712
713 if (hibernate_in_progress_with_pinned_swap == TRUE) {
714 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
715 }
716
717 if (compressor_store_stop_compaction == TRUE) {
718 thread_wakeup((event_t)&compressor_store_stop_compaction);
719 }
720
721 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
722
723 lck_mtx_unlock(&vm_swap_data_lock);
724
725 thread_block((thread_continue_t)vm_swapfile_create_thread);
726
727 /* NOTREACHED */
728 }
729
730
731 #if HIBERNATION
732
733 kern_return_t
734 hibernate_pin_swap(boolean_t start)
735 {
736 vm_compaction_swapper_do_init();
737
738 if (start == FALSE) {
739 lck_mtx_lock(&vm_swap_data_lock);
740 hibernate_in_progress_with_pinned_swap = FALSE;
741 lck_mtx_unlock(&vm_swap_data_lock);
742
743 return KERN_SUCCESS;
744 }
745 if (vm_swappin_enabled == FALSE) {
746 return KERN_SUCCESS;
747 }
748
749 lck_mtx_lock(&vm_swap_data_lock);
750
751 hibernate_in_progress_with_pinned_swap = TRUE;
752
753 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
754 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
755
756 lck_mtx_unlock(&vm_swap_data_lock);
757
758 thread_block(THREAD_CONTINUE_NULL);
759
760 lck_mtx_lock(&vm_swap_data_lock);
761 }
762 if (vm_num_swap_files > vm_num_pinned_swap_files) {
763 hibernate_in_progress_with_pinned_swap = FALSE;
764 lck_mtx_unlock(&vm_swap_data_lock);
765
766 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
767 vm_num_swap_files, vm_num_pinned_swap_files);
768 return KERN_FAILURE;
769 }
770 lck_mtx_unlock(&vm_swap_data_lock);
771
772 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
773 if (vm_swap_create_file() == FALSE) {
774 break;
775 }
776 }
777 return KERN_SUCCESS;
778 }
779 #endif
780
781 static void
782 vm_swapfile_gc_thread(void)
783 {
784 boolean_t need_defragment;
785 boolean_t need_reclaim;
786
787 vm_swapfile_gc_thread_awakened++;
788 vm_swapfile_gc_thread_running = 1;
789
790 while (TRUE) {
791 lck_mtx_lock(&vm_swap_data_lock);
792
793 if (hibernate_in_progress_with_pinned_swap == TRUE) {
794 break;
795 }
796
797 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
798 break;
799 }
800
801 need_defragment = FALSE;
802 need_reclaim = FALSE;
803
804 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
805 need_defragment = TRUE;
806 }
807
808 if (VM_SWAP_SHOULD_RECLAIM()) {
809 need_defragment = TRUE;
810 need_reclaim = TRUE;
811 }
812 if (need_defragment == FALSE && need_reclaim == FALSE) {
813 break;
814 }
815
816 vm_swap_force_defrag = FALSE;
817 vm_swap_force_reclaim = FALSE;
818
819 lck_mtx_unlock(&vm_swap_data_lock);
820
821 if (need_defragment == TRUE) {
822 vm_swap_defragment();
823 }
824 if (need_reclaim == TRUE) {
825 vm_swap_reclaim();
826 }
827 }
828 vm_swapfile_gc_thread_running = 0;
829
830 if (hibernate_in_progress_with_pinned_swap == TRUE) {
831 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
832 }
833
834 if (compressor_store_stop_compaction == TRUE) {
835 thread_wakeup((event_t)&compressor_store_stop_compaction);
836 }
837
838 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
839
840 lck_mtx_unlock(&vm_swap_data_lock);
841
842 thread_block((thread_continue_t)vm_swapfile_gc_thread);
843
844 /* NOTREACHED */
845 }
846
847
848
849 #define VM_SWAPOUT_LIMIT_T2P 4
850 #define VM_SWAPOUT_LIMIT_T1P 4
851 #define VM_SWAPOUT_LIMIT_T0P 6
852 #define VM_SWAPOUT_LIMIT_T0 8
853 #define VM_SWAPOUT_LIMIT_MAX 8
854
855 #define VM_SWAPOUT_START 0
856 #define VM_SWAPOUT_T2_PASSIVE 1
857 #define VM_SWAPOUT_T1_PASSIVE 2
858 #define VM_SWAPOUT_T0_PASSIVE 3
859 #define VM_SWAPOUT_T0 4
860
861 int vm_swapout_state = VM_SWAPOUT_START;
862 int vm_swapout_limit = 1;
863
864 int vm_swapper_entered_T0 = 0;
865 int vm_swapper_entered_T0P = 0;
866 int vm_swapper_entered_T1P = 0;
867 int vm_swapper_entered_T2P = 0;
868
869
870 static void
871 vm_swapout_thread_throttle_adjust(void)
872 {
873 switch (vm_swapout_state) {
874 case VM_SWAPOUT_START:
875
876 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
877 vm_swapper_entered_T2P++;
878
879 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
880 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
881 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
882 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
883 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
884 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
885
886 break;
887
888 case VM_SWAPOUT_T2_PASSIVE:
889
890 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
891 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
892 vm_swapper_entered_T0P++;
893
894 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
895 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
896 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
897 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
898 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
899 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
900
901 break;
902 }
903 if (swapout_target_age || hibernate_flushing == TRUE) {
904 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
905 vm_swapper_entered_T1P++;
906
907 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
908 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
909 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
910 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
911 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
912 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
913 }
914 break;
915
916 case VM_SWAPOUT_T1_PASSIVE:
917
918 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
919 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
920 vm_swapper_entered_T0P++;
921
922 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
923 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
924 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
925 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
926 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
927 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
928
929 break;
930 }
931 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
932 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
933 vm_swapper_entered_T2P++;
934
935 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
936 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
937 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
938 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
939 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
940 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
941 }
942 break;
943
944 case VM_SWAPOUT_T0_PASSIVE:
945
946 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
947 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
948 vm_swapper_entered_T2P++;
949
950 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
951 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
952 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
953 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
954 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
955 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
956
957 break;
958 }
959 if (SWAPPER_NEEDS_TO_CATCHUP()) {
960 vm_swapper_entered_T0++;
961
962 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
963 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
964 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
965 vm_swapout_state = VM_SWAPOUT_T0;
966 }
967 break;
968
969 case VM_SWAPOUT_T0:
970
971 if (SWAPPER_HAS_CAUGHTUP()) {
972 vm_swapper_entered_T0P++;
973
974 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
975 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
976 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
977 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
978 }
979 break;
980 }
981 }
982
983 int vm_swapout_found_empty = 0;
984
985 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
986
987 int vm_swapout_soc_busy = 0;
988 int vm_swapout_soc_done = 0;
989
990
991 static struct swapout_io_completion *
992 vm_swapout_find_free_soc(void)
993 {
994 int i;
995
996 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
997 if (vm_swapout_ctx[i].swp_io_busy == 0) {
998 return &vm_swapout_ctx[i];
999 }
1000 }
1001 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1002
1003 return NULL;
1004 }
1005
1006 static struct swapout_io_completion *
1007 vm_swapout_find_done_soc(void)
1008 {
1009 int i;
1010
1011 if (vm_swapout_soc_done) {
1012 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1013 if (vm_swapout_ctx[i].swp_io_done) {
1014 return &vm_swapout_ctx[i];
1015 }
1016 }
1017 }
1018 return NULL;
1019 }
1020
1021 static void
1022 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1023 {
1024 kern_return_t kr;
1025
1026 if (soc->swp_io_error) {
1027 kr = KERN_FAILURE;
1028 } else {
1029 kr = KERN_SUCCESS;
1030 }
1031
1032 lck_mtx_unlock_always(c_list_lock);
1033
1034 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error);
1035 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1036
1037 lck_mtx_lock_spin_always(c_list_lock);
1038
1039 soc->swp_io_done = 0;
1040 soc->swp_io_busy = 0;
1041
1042 vm_swapout_soc_busy--;
1043 vm_swapout_soc_done--;
1044 }
1045
1046
1047 static void
1048 vm_swapout_thread(void)
1049 {
1050 uint32_t size = 0;
1051 c_segment_t c_seg = NULL;
1052 kern_return_t kr = KERN_SUCCESS;
1053 struct swapout_io_completion *soc;
1054
1055 current_thread()->options |= TH_OPT_VMPRIV;
1056
1057 vm_swapout_thread_awakened++;
1058
1059 lck_mtx_lock_spin_always(c_list_lock);
1060 again:
1061 while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit) {
1062 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1063
1064 lck_mtx_lock_spin_always(&c_seg->c_lock);
1065
1066 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1067
1068 if (c_seg->c_busy) {
1069 lck_mtx_unlock_always(c_list_lock);
1070
1071 c_seg_wait_on_busy(c_seg);
1072
1073 lck_mtx_lock_spin_always(c_list_lock);
1074
1075 continue;
1076 }
1077 vm_swapout_thread_processed_segments++;
1078
1079 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1080
1081 if (size == 0) {
1082 assert(c_seg->c_bytes_used == 0);
1083
1084 if (!c_seg->c_on_minorcompact_q) {
1085 c_seg_need_delayed_compaction(c_seg, TRUE);
1086 }
1087
1088 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1089 lck_mtx_unlock_always(&c_seg->c_lock);
1090 lck_mtx_unlock_always(c_list_lock);
1091
1092 vm_swapout_found_empty++;
1093 goto c_seg_is_empty;
1094 }
1095 C_SEG_BUSY(c_seg);
1096 c_seg->c_busy_swapping = 1;
1097
1098 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1099
1100 lck_mtx_unlock_always(c_list_lock);
1101 lck_mtx_unlock_always(&c_seg->c_lock);
1102
1103 #if CHECKSUM_THE_SWAP
1104 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1105 c_seg->cseg_swap_size = size;
1106 #endif /* CHECKSUM_THE_SWAP */
1107
1108 #if ENCRYPTED_SWAP
1109 vm_swap_encrypt(c_seg);
1110 #endif /* ENCRYPTED_SWAP */
1111
1112 soc = vm_swapout_find_free_soc();
1113 assert(soc);
1114
1115 soc->swp_upl_ctx.io_context = (void *)soc;
1116 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1117 soc->swp_upl_ctx.io_error = 0;
1118
1119 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1120
1121 if (kr != KERN_SUCCESS) {
1122 if (soc->swp_io_done) {
1123 lck_mtx_lock_spin_always(c_list_lock);
1124
1125 soc->swp_io_done = 0;
1126 vm_swapout_soc_done--;
1127
1128 lck_mtx_unlock_always(c_list_lock);
1129 }
1130 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1131 } else {
1132 soc->swp_io_busy = 1;
1133 vm_swapout_soc_busy++;
1134 }
1135
1136 c_seg_is_empty:
1137 if (c_swapout_count == 0) {
1138 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1139 }
1140
1141 lck_mtx_lock_spin_always(c_list_lock);
1142
1143 if ((soc = vm_swapout_find_done_soc())) {
1144 vm_swapout_complete_soc(soc);
1145 }
1146 lck_mtx_unlock_always(c_list_lock);
1147
1148 vm_swapout_thread_throttle_adjust();
1149 vm_pageout_io_throttle();
1150
1151 lck_mtx_lock_spin_always(c_list_lock);
1152 }
1153 if ((soc = vm_swapout_find_done_soc())) {
1154 vm_swapout_complete_soc(soc);
1155 goto again;
1156 }
1157 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1158
1159 lck_mtx_unlock_always(c_list_lock);
1160
1161 thread_block((thread_continue_t)vm_swapout_thread);
1162
1163 /* NOTREACHED */
1164 }
1165
1166
1167 void
1168 vm_swapout_iodone(void *io_context, int error)
1169 {
1170 struct swapout_io_completion *soc;
1171
1172 soc = (struct swapout_io_completion *)io_context;
1173
1174 lck_mtx_lock_spin_always(c_list_lock);
1175
1176 soc->swp_io_done = 1;
1177 soc->swp_io_error = error;
1178 vm_swapout_soc_done++;
1179
1180 thread_wakeup((event_t)&c_swapout_list_head);
1181
1182 lck_mtx_unlock_always(c_list_lock);
1183 }
1184
1185
1186 static void
1187 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1188 {
1189 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1190
1191 if (kr == KERN_SUCCESS) {
1192 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size, KMA_COMPRESSOR);
1193 }
1194 #if ENCRYPTED_SWAP
1195 else {
1196 vm_swap_decrypt(c_seg);
1197 }
1198 #endif /* ENCRYPTED_SWAP */
1199 lck_mtx_lock_spin_always(c_list_lock);
1200 lck_mtx_lock_spin_always(&c_seg->c_lock);
1201
1202 if (kr == KERN_SUCCESS) {
1203 int new_state = C_ON_SWAPPEDOUT_Q;
1204 boolean_t insert_head = FALSE;
1205
1206 if (hibernate_flushing == TRUE) {
1207 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1208 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1209 insert_head = TRUE;
1210 }
1211 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1212 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1213 }
1214
1215 c_seg_switch_state(c_seg, new_state, insert_head);
1216
1217 c_seg->c_store.c_swap_handle = f_offset;
1218
1219 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
1220
1221 if (c_seg->c_bytes_used) {
1222 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1223 }
1224 } else {
1225 if (c_seg->c_overage_swap == TRUE) {
1226 c_seg->c_overage_swap = FALSE;
1227 c_overage_swapped_count--;
1228 }
1229 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1230
1231 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1232 c_seg_need_delayed_compaction(c_seg, TRUE);
1233 }
1234 }
1235 assert(c_seg->c_busy_swapping);
1236 assert(c_seg->c_busy);
1237
1238 c_seg->c_busy_swapping = 0;
1239 lck_mtx_unlock_always(c_list_lock);
1240
1241 C_SEG_WAKEUP_DONE(c_seg);
1242 lck_mtx_unlock_always(&c_seg->c_lock);
1243
1244 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1245 }
1246
1247
1248 boolean_t
1249 vm_swap_create_file()
1250 {
1251 uint64_t size = 0;
1252 int namelen = 0;
1253 boolean_t swap_file_created = FALSE;
1254 boolean_t swap_file_reuse = FALSE;
1255 boolean_t swap_file_pin = FALSE;
1256 struct swapfile *swf = NULL;
1257
1258 /*
1259 * make sure we've got all the info we need
1260 * to potentially pin a swap file... we could
1261 * be swapping out due to hibernation w/o ever
1262 * having run vm_pageout_scan, which is normally
1263 * the trigger to do the init
1264 */
1265 vm_compaction_swapper_do_init();
1266
1267 /*
1268 * Any swapfile structure ready for re-use?
1269 */
1270
1271 lck_mtx_lock(&vm_swap_data_lock);
1272
1273 swf = (struct swapfile*) queue_first(&swf_global_queue);
1274
1275 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1276 if (swf->swp_flags == SWAP_REUSE) {
1277 swap_file_reuse = TRUE;
1278 break;
1279 }
1280 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1281 }
1282
1283 lck_mtx_unlock(&vm_swap_data_lock);
1284
1285 if (swap_file_reuse == FALSE) {
1286 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1287
1288 swf = (struct swapfile*) kalloc(sizeof *swf);
1289 memset(swf, 0, sizeof(*swf));
1290
1291 swf->swp_index = vm_num_swap_files + 1;
1292 swf->swp_pathlen = namelen;
1293 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
1294
1295 memset(swf->swp_path, 0, namelen);
1296
1297 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1298 }
1299
1300 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1301
1302 if (swf->swp_vp == NULL) {
1303 if (swap_file_reuse == FALSE) {
1304 kfree(swf->swp_path, swf->swp_pathlen);
1305 kfree(swf, sizeof *swf);
1306 }
1307 return FALSE;
1308 }
1309 vm_swapfile_can_be_created = TRUE;
1310
1311 size = MAX_SWAP_FILE_SIZE;
1312
1313 while (size >= MIN_SWAP_FILE_SIZE) {
1314 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1315
1316 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1317 int num_bytes_for_bitmap = 0;
1318
1319 swap_file_created = TRUE;
1320
1321 swf->swp_size = size;
1322 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
1323 swf->swp_nseginuse = 0;
1324 swf->swp_free_hint = 0;
1325
1326 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1327 /*
1328 * Allocate a bitmap that describes the
1329 * number of segments held by this swapfile.
1330 */
1331 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
1332 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
1333
1334 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
1335 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
1336
1337 /*
1338 * passing a NULL trim_list into vnode_trim_list
1339 * will return ENOTSUP if trim isn't supported
1340 * and 0 if it is
1341 */
1342 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1343 swp_trim_supported = TRUE;
1344 }
1345
1346 lck_mtx_lock(&vm_swap_data_lock);
1347
1348 swf->swp_flags = SWAP_READY;
1349
1350 if (swap_file_reuse == FALSE) {
1351 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1352 }
1353
1354 vm_num_swap_files++;
1355
1356 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1357
1358 if (swap_file_pin == TRUE) {
1359 vm_num_pinned_swap_files++;
1360 swf->swp_flags |= SWAP_PINNED;
1361 vm_swappin_avail -= swf->swp_size;
1362 }
1363
1364 lck_mtx_unlock(&vm_swap_data_lock);
1365
1366 thread_wakeup((event_t) &vm_num_swap_files);
1367 #if CONFIG_EMBEDDED
1368 if (vm_num_swap_files == 1) {
1369 c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;
1370
1371 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1372 c_overage_swapped_limit /= 2;
1373 }
1374 }
1375 #endif
1376 break;
1377 } else {
1378 size = size / 2;
1379 }
1380 }
1381 if (swap_file_created == FALSE) {
1382 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1383
1384 swf->swp_vp = NULL;
1385
1386 if (swap_file_reuse == FALSE) {
1387 kfree(swf->swp_path, swf->swp_pathlen);
1388 kfree(swf, sizeof *swf);
1389 }
1390 }
1391 return swap_file_created;
1392 }
1393
1394
1395 kern_return_t
1396 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1397 {
1398 struct swapfile *swf = NULL;
1399 uint64_t file_offset = 0;
1400 int retval = 0;
1401
1402 assert(c_seg->c_store.c_buffer);
1403
1404 lck_mtx_lock(&vm_swap_data_lock);
1405
1406 swf = vm_swapfile_for_handle(f_offset);
1407
1408 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1409 vm_swap_get_failures++;
1410 retval = 1;
1411 goto done;
1412 }
1413 swf->swp_io_count++;
1414
1415 lck_mtx_unlock(&vm_swap_data_lock);
1416
1417 #if DEVELOPMENT || DEBUG
1418 C_SEG_MAKE_WRITEABLE(c_seg);
1419 #endif
1420 file_offset = (f_offset & SWAP_SLOT_MASK);
1421 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1422
1423 #if DEVELOPMENT || DEBUG
1424 C_SEG_WRITE_PROTECT(c_seg);
1425 #endif
1426 if (retval == 0) {
1427 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
1428 } else {
1429 vm_swap_get_failures++;
1430 }
1431
1432 /*
1433 * Free this slot in the swap structure.
1434 */
1435 vm_swap_free(f_offset);
1436
1437 lck_mtx_lock(&vm_swap_data_lock);
1438 swf->swp_io_count--;
1439
1440 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1441 swf->swp_flags &= ~SWAP_WANTED;
1442 thread_wakeup((event_t) &swf->swp_flags);
1443 }
1444 done:
1445 lck_mtx_unlock(&vm_swap_data_lock);
1446
1447 if (retval == 0) {
1448 return KERN_SUCCESS;
1449 } else {
1450 return KERN_FAILURE;
1451 }
1452 }
1453
1454 kern_return_t
1455 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1456 {
1457 unsigned int segidx = 0;
1458 struct swapfile *swf = NULL;
1459 uint64_t file_offset = 0;
1460 uint64_t swapfile_index = 0;
1461 unsigned int byte_for_segidx = 0;
1462 unsigned int offset_within_byte = 0;
1463 boolean_t swf_eligible = FALSE;
1464 boolean_t waiting = FALSE;
1465 boolean_t retried = FALSE;
1466 int error = 0;
1467 clock_sec_t sec;
1468 clock_nsec_t nsec;
1469 void *upl_ctx = NULL;
1470
1471 if (addr == 0 || f_offset == NULL) {
1472 return KERN_FAILURE;
1473 }
1474 retry:
1475 lck_mtx_lock(&vm_swap_data_lock);
1476
1477 swf = (struct swapfile*) queue_first(&swf_global_queue);
1478
1479 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1480 segidx = swf->swp_free_hint;
1481
1482 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1483
1484 if (swf_eligible) {
1485 while (segidx < swf->swp_nsegs) {
1486 byte_for_segidx = segidx >> 3;
1487 offset_within_byte = segidx % 8;
1488
1489 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1490 segidx++;
1491 continue;
1492 }
1493
1494 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1495
1496 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1497 swf->swp_nseginuse++;
1498 swf->swp_io_count++;
1499 swf->swp_csegs[segidx] = c_seg;
1500
1501 swapfile_index = swf->swp_index;
1502 vm_swapfile_total_segs_used++;
1503
1504 clock_get_system_nanotime(&sec, &nsec);
1505
1506 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1507 thread_wakeup((event_t) &vm_swapfile_create_needed);
1508 }
1509
1510 lck_mtx_unlock(&vm_swap_data_lock);
1511
1512 goto issue_io;
1513 }
1514 }
1515 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1516 }
1517 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1518
1519 /*
1520 * we've run out of swap segments, but may not
1521 * be in a position to immediately create a new swap
1522 * file if we've recently failed to create due to a lack
1523 * of free space in the root filesystem... we'll try
1524 * to kick that create off, but in any event we're going
1525 * to take a breather (up to 1 second) so that we're not caught in a tight
1526 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1527 * segments into swap files only to have them immediately put back
1528 * on the c_age queue due to vm_swap_put failing.
1529 *
1530 * if we're doing these puts due to a hibernation flush,
1531 * no need to block... setting hibernate_no_swapspace to TRUE,
1532 * will cause "vm_compressor_compact_and_swap" to immediately abort
1533 */
1534 clock_get_system_nanotime(&sec, &nsec);
1535
1536 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1537 thread_wakeup((event_t) &vm_swapfile_create_needed);
1538 }
1539
1540 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1541 waiting = TRUE;
1542 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1543 } else {
1544 hibernate_no_swapspace = TRUE;
1545 }
1546
1547 lck_mtx_unlock(&vm_swap_data_lock);
1548
1549 if (waiting == TRUE) {
1550 thread_block(THREAD_CONTINUE_NULL);
1551
1552 if (retried == FALSE && hibernate_flushing == TRUE) {
1553 retried = TRUE;
1554 goto retry;
1555 }
1556 }
1557 vm_swap_put_failures_no_swap_file++;
1558
1559 return KERN_FAILURE;
1560
1561 issue_io:
1562 assert(c_seg->c_busy_swapping);
1563 assert(c_seg->c_busy);
1564 assert(!c_seg->c_on_minorcompact_q);
1565
1566 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1567
1568 if (soc) {
1569 soc->swp_c_seg = c_seg;
1570 soc->swp_c_size = size;
1571
1572 soc->swp_swf = swf;
1573
1574 soc->swp_io_error = 0;
1575 soc->swp_io_done = 0;
1576
1577 upl_ctx = (void *)&soc->swp_upl_ctx;
1578 }
1579 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1580
1581 if (error || upl_ctx == NULL) {
1582 return vm_swap_put_finish(swf, f_offset, error);
1583 }
1584
1585 return KERN_SUCCESS;
1586 }
1587
1588 kern_return_t
1589 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error)
1590 {
1591 lck_mtx_lock(&vm_swap_data_lock);
1592
1593 swf->swp_io_count--;
1594
1595 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1596 swf->swp_flags &= ~SWAP_WANTED;
1597 thread_wakeup((event_t) &swf->swp_flags);
1598 }
1599 lck_mtx_unlock(&vm_swap_data_lock);
1600
1601 if (error) {
1602 vm_swap_free(*f_offset);
1603 vm_swap_put_failures++;
1604
1605 return KERN_FAILURE;
1606 }
1607 return KERN_SUCCESS;
1608 }
1609
1610
1611 static void
1612 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1613 {
1614 uint64_t file_offset = 0;
1615 unsigned int segidx = 0;
1616
1617
1618 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1619 unsigned int byte_for_segidx = 0;
1620 unsigned int offset_within_byte = 0;
1621
1622 file_offset = (f_offset & SWAP_SLOT_MASK);
1623 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1624
1625 byte_for_segidx = segidx >> 3;
1626 offset_within_byte = segidx % 8;
1627
1628 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1629 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1630
1631 swf->swp_csegs[segidx] = NULL;
1632
1633 swf->swp_nseginuse--;
1634 vm_swapfile_total_segs_used--;
1635
1636 if (segidx < swf->swp_free_hint) {
1637 swf->swp_free_hint = segidx;
1638 }
1639 }
1640 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1641 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1642 }
1643 }
1644 }
1645
1646
1647 uint32_t vm_swap_free_now_count = 0;
1648 uint32_t vm_swap_free_delayed_count = 0;
1649
1650
1651 void
1652 vm_swap_free(uint64_t f_offset)
1653 {
1654 struct swapfile *swf = NULL;
1655 struct trim_list *tl = NULL;
1656 clock_sec_t sec;
1657 clock_nsec_t nsec;
1658
1659 if (swp_trim_supported == TRUE) {
1660 tl = kalloc(sizeof(struct trim_list));
1661 }
1662
1663 lck_mtx_lock(&vm_swap_data_lock);
1664
1665 swf = vm_swapfile_for_handle(f_offset);
1666
1667 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1668 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1669 /*
1670 * don't delay the free if the underlying disk doesn't support
1671 * trim, or we're in the midst of reclaiming this swap file since
1672 * we don't want to move segments that are technically free
1673 * but not yet handled by the delayed free mechanism
1674 */
1675 vm_swap_free_now(swf, f_offset);
1676
1677 vm_swap_free_now_count++;
1678 goto done;
1679 }
1680 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1681 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1682
1683 tl->tl_next = swf->swp_delayed_trim_list_head;
1684 swf->swp_delayed_trim_list_head = tl;
1685 swf->swp_delayed_trim_count++;
1686 tl = NULL;
1687
1688 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1689 clock_get_system_nanotime(&sec, &nsec);
1690
1691 if (sec > dont_trim_until_ts) {
1692 thread_wakeup((event_t) &vm_swapfile_create_needed);
1693 }
1694 }
1695 vm_swap_free_delayed_count++;
1696 }
1697 done:
1698 lck_mtx_unlock(&vm_swap_data_lock);
1699
1700 if (tl != NULL) {
1701 kfree(tl, sizeof(struct trim_list));
1702 }
1703 }
1704
1705
1706 static void
1707 vm_swap_wait_on_trim_handling_in_progress()
1708 {
1709 while (delayed_trim_handling_in_progress == TRUE) {
1710 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1711 lck_mtx_unlock(&vm_swap_data_lock);
1712
1713 thread_block(THREAD_CONTINUE_NULL);
1714
1715 lck_mtx_lock(&vm_swap_data_lock);
1716 }
1717 }
1718
1719
1720 static void
1721 vm_swap_handle_delayed_trims(boolean_t force_now)
1722 {
1723 struct swapfile *swf = NULL;
1724
1725 /*
1726 * serialize the race between us and vm_swap_reclaim...
1727 * if vm_swap_reclaim wins it will turn off SWAP_READY
1728 * on the victim it has chosen... we can just skip over
1729 * that file since vm_swap_reclaim will first process
1730 * all of the delayed trims associated with it
1731 */
1732 lck_mtx_lock(&vm_swap_data_lock);
1733
1734 delayed_trim_handling_in_progress = TRUE;
1735
1736 lck_mtx_unlock(&vm_swap_data_lock);
1737
1738 /*
1739 * no need to hold the lock to walk the swf list since
1740 * vm_swap_create (the only place where we add to this list)
1741 * is run on the same thread as this function
1742 * and vm_swap_reclaim doesn't remove items from this list
1743 * instead marking them with SWAP_REUSE for future re-use
1744 */
1745 swf = (struct swapfile*) queue_first(&swf_global_queue);
1746
1747 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1748 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1749 assert(!(swf->swp_flags & SWAP_RECLAIM));
1750 vm_swap_do_delayed_trim(swf);
1751 }
1752 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1753 }
1754 lck_mtx_lock(&vm_swap_data_lock);
1755
1756 delayed_trim_handling_in_progress = FALSE;
1757 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1758
1759 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1760 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1761 }
1762
1763 lck_mtx_unlock(&vm_swap_data_lock);
1764 }
1765
1766 static void
1767 vm_swap_do_delayed_trim(struct swapfile *swf)
1768 {
1769 struct trim_list *tl, *tl_head;
1770
1771 lck_mtx_lock(&vm_swap_data_lock);
1772
1773 tl_head = swf->swp_delayed_trim_list_head;
1774 swf->swp_delayed_trim_list_head = NULL;
1775 swf->swp_delayed_trim_count = 0;
1776
1777 lck_mtx_unlock(&vm_swap_data_lock);
1778
1779 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1780
1781 while ((tl = tl_head) != NULL) {
1782 unsigned int segidx = 0;
1783 unsigned int byte_for_segidx = 0;
1784 unsigned int offset_within_byte = 0;
1785
1786 lck_mtx_lock(&vm_swap_data_lock);
1787
1788 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1789
1790 byte_for_segidx = segidx >> 3;
1791 offset_within_byte = segidx % 8;
1792
1793 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1794 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1795
1796 swf->swp_csegs[segidx] = NULL;
1797
1798 swf->swp_nseginuse--;
1799 vm_swapfile_total_segs_used--;
1800
1801 if (segidx < swf->swp_free_hint) {
1802 swf->swp_free_hint = segidx;
1803 }
1804 }
1805 lck_mtx_unlock(&vm_swap_data_lock);
1806
1807 tl_head = tl->tl_next;
1808
1809 kfree(tl, sizeof(struct trim_list));
1810 }
1811 }
1812
1813
1814 void
1815 vm_swap_flush()
1816 {
1817 return;
1818 }
1819
1820 int vm_swap_reclaim_yielded = 0;
1821
1822 void
1823 vm_swap_reclaim(void)
1824 {
1825 vm_offset_t addr = 0;
1826 unsigned int segidx = 0;
1827 uint64_t f_offset = 0;
1828 struct swapfile *swf = NULL;
1829 struct swapfile *smallest_swf = NULL;
1830 unsigned int min_nsegs = 0;
1831 unsigned int byte_for_segidx = 0;
1832 unsigned int offset_within_byte = 0;
1833 uint32_t c_size = 0;
1834
1835 c_segment_t c_seg = NULL;
1836
1837 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1838 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1839 }
1840
1841 lck_mtx_lock(&vm_swap_data_lock);
1842
1843 /*
1844 * if we're running the swapfile list looking for
1845 * candidates with delayed trims, we need to
1846 * wait before making our decision concerning
1847 * the swapfile we want to reclaim
1848 */
1849 vm_swap_wait_on_trim_handling_in_progress();
1850
1851 /*
1852 * from here until we knock down the SWAP_READY bit,
1853 * we need to remain behind the vm_swap_data_lock...
1854 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1855 * will not consider this swapfile for processing
1856 */
1857 swf = (struct swapfile*) queue_first(&swf_global_queue);
1858 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1859 smallest_swf = NULL;
1860
1861 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1862 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1863 smallest_swf = swf;
1864 min_nsegs = swf->swp_nseginuse;
1865 }
1866 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1867 }
1868
1869 if (smallest_swf == NULL) {
1870 goto done;
1871 }
1872
1873 swf = smallest_swf;
1874
1875
1876 swf->swp_flags &= ~SWAP_READY;
1877 swf->swp_flags |= SWAP_RECLAIM;
1878
1879 if (swf->swp_delayed_trim_count) {
1880 lck_mtx_unlock(&vm_swap_data_lock);
1881
1882 vm_swap_do_delayed_trim(swf);
1883
1884 lck_mtx_lock(&vm_swap_data_lock);
1885 }
1886 segidx = 0;
1887
1888 while (segidx < swf->swp_nsegs) {
1889 ReTry_for_cseg:
1890 /*
1891 * Wait for outgoing I/Os.
1892 */
1893 while (swf->swp_io_count) {
1894 swf->swp_flags |= SWAP_WANTED;
1895
1896 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1897 lck_mtx_unlock(&vm_swap_data_lock);
1898
1899 thread_block(THREAD_CONTINUE_NULL);
1900
1901 lck_mtx_lock(&vm_swap_data_lock);
1902 }
1903 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1904 vm_swap_reclaim_yielded++;
1905 break;
1906 }
1907
1908 byte_for_segidx = segidx >> 3;
1909 offset_within_byte = segidx % 8;
1910
1911 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1912 segidx++;
1913 continue;
1914 }
1915
1916 c_seg = swf->swp_csegs[segidx];
1917 assert(c_seg);
1918
1919 lck_mtx_lock_spin_always(&c_seg->c_lock);
1920
1921 if (c_seg->c_busy) {
1922 /*
1923 * a swapped out c_segment in the process of being freed will remain in the
1924 * busy state until after the vm_swap_free is called on it... vm_swap_free
1925 * takes the vm_swap_data_lock, so can't change the swap state until after
1926 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1927 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1928 * at that point, we re-look up the swap state which will now indicate that
1929 * this c_segment no longer exists.
1930 */
1931 c_seg->c_wanted = 1;
1932
1933 assert_wait((event_t) (c_seg), THREAD_UNINT);
1934 lck_mtx_unlock_always(&c_seg->c_lock);
1935
1936 lck_mtx_unlock(&vm_swap_data_lock);
1937
1938 thread_block(THREAD_CONTINUE_NULL);
1939
1940 lck_mtx_lock(&vm_swap_data_lock);
1941
1942 goto ReTry_for_cseg;
1943 }
1944 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1945
1946 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1947
1948 assert(c_seg == swf->swp_csegs[segidx]);
1949 swf->swp_csegs[segidx] = NULL;
1950 swf->swp_nseginuse--;
1951
1952 vm_swapfile_total_segs_used--;
1953
1954 lck_mtx_unlock(&vm_swap_data_lock);
1955
1956 assert(C_SEG_IS_ONDISK(c_seg));
1957
1958 C_SEG_BUSY(c_seg);
1959 c_seg->c_busy_swapping = 1;
1960 #if !CHECKSUM_THE_SWAP
1961 c_seg_trim_tail(c_seg);
1962 #endif
1963 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1964
1965 assert(c_size <= C_SEG_BUFSIZE && c_size);
1966
1967 lck_mtx_unlock_always(&c_seg->c_lock);
1968
1969 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
1970 /*
1971 * reading the data back in failed, so convert c_seg
1972 * to a swapped in c_segment that contains no data
1973 */
1974 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
1975 /*
1976 * returns with c_busy_swapping cleared
1977 */
1978
1979 vm_swap_get_failures++;
1980 goto swap_io_failed;
1981 }
1982 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
1983
1984 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
1985 vm_offset_t c_buffer;
1986
1987 /*
1988 * the put failed, so convert c_seg to a fully swapped in c_segment
1989 * with valid data
1990 */
1991 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
1992
1993 kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1994
1995 memcpy((char *)c_buffer, (char *)addr, c_size);
1996
1997 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
1998 #if ENCRYPTED_SWAP
1999 vm_swap_decrypt(c_seg);
2000 #endif /* ENCRYPTED_SWAP */
2001 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2002 /*
2003 * returns with c_busy_swapping cleared
2004 */
2005 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2006
2007 goto swap_io_failed;
2008 }
2009 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
2010
2011 lck_mtx_lock_spin_always(&c_seg->c_lock);
2012
2013 assert(C_SEG_IS_ONDISK(c_seg));
2014 /*
2015 * The c_seg will now know about the new location on disk.
2016 */
2017 c_seg->c_store.c_swap_handle = f_offset;
2018
2019 assert(c_seg->c_busy_swapping);
2020 c_seg->c_busy_swapping = 0;
2021 swap_io_failed:
2022 assert(c_seg->c_busy);
2023 C_SEG_WAKEUP_DONE(c_seg);
2024
2025 lck_mtx_unlock_always(&c_seg->c_lock);
2026 lck_mtx_lock(&vm_swap_data_lock);
2027 }
2028
2029 if (swf->swp_nseginuse) {
2030 swf->swp_flags &= ~SWAP_RECLAIM;
2031 swf->swp_flags |= SWAP_READY;
2032
2033 goto done;
2034 }
2035 /*
2036 * We don't remove this inactive swf from the queue.
2037 * That way, we can re-use it when needed again and
2038 * preserve the namespace. The delayed_trim processing
2039 * is also dependent on us not removing swfs from the queue.
2040 */
2041 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2042
2043 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2044
2045 lck_mtx_unlock(&vm_swap_data_lock);
2046
2047 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2048
2049 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
2050 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2051
2052 lck_mtx_lock(&vm_swap_data_lock);
2053
2054 if (swf->swp_flags & SWAP_PINNED) {
2055 vm_num_pinned_swap_files--;
2056 vm_swappin_avail += swf->swp_size;
2057 }
2058
2059 swf->swp_vp = NULL;
2060 swf->swp_size = 0;
2061 swf->swp_free_hint = 0;
2062 swf->swp_nsegs = 0;
2063 swf->swp_flags = SWAP_REUSE;
2064
2065 vm_num_swap_files--;
2066
2067 done:
2068 thread_wakeup((event_t) &swf->swp_flags);
2069 lck_mtx_unlock(&vm_swap_data_lock);
2070
2071 kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
2072 }
2073
2074
2075 uint64_t
2076 vm_swap_get_total_space(void)
2077 {
2078 uint64_t total_space = 0;
2079
2080 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
2081
2082 return total_space;
2083 }
2084
2085 uint64_t
2086 vm_swap_get_used_space(void)
2087 {
2088 uint64_t used_space = 0;
2089
2090 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
2091
2092 return used_space;
2093 }
2094
2095 uint64_t
2096 vm_swap_get_free_space(void)
2097 {
2098 return vm_swap_get_total_space() - vm_swap_get_used_space();
2099 }
2100
2101
2102 int
2103 vm_swap_low_on_space(void)
2104 {
2105 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2106 return 0;
2107 }
2108
2109 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
2110 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2111 return 0;
2112 }
2113
2114 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2115 return 1;
2116 }
2117 }
2118 return 0;
2119 }
2120
2121 boolean_t
2122 vm_swap_files_pinned(void)
2123 {
2124 boolean_t result;
2125
2126 if (vm_swappin_enabled == FALSE) {
2127 return TRUE;
2128 }
2129
2130 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2131
2132 return result;
2133 }
2134
2135 #if CONFIG_FREEZE
2136 boolean_t
2137 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2138 {
2139 boolean_t use_device_value = FALSE;
2140 struct swapfile *swf = NULL;
2141
2142 if (vm_num_swap_files) {
2143 lck_mtx_lock(&vm_swap_data_lock);
2144
2145 swf = (struct swapfile*) queue_first(&swf_global_queue);
2146
2147 if (swf) {
2148 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2149 if (swf->swp_flags == SWAP_READY) {
2150 assert(swf->swp_vp);
2151
2152 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2153 use_device_value = TRUE;
2154 }
2155 break;
2156 }
2157 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2158 }
2159 }
2160
2161 lck_mtx_unlock(&vm_swap_data_lock);
2162 } else {
2163 /*
2164 * This block is used for the initial budget value before any swap files
2165 * are created. We create a temp swap file to get the budget.
2166 */
2167
2168 struct vnode *temp_vp = NULL;
2169
2170 vm_swapfile_open(swapfilename, &temp_vp);
2171
2172 if (temp_vp) {
2173 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2174 use_device_value = TRUE;
2175 }
2176
2177 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2178 temp_vp = NULL;
2179 } else {
2180 *freeze_daily_budget = 0;
2181 }
2182 }
2183
2184 return use_device_value;
2185 }
2186 #endif /* CONFIG_FREEZE */