]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_compressor_backing_store.c
xnu-3247.10.11.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
CommitLineData
39236c6e
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include "vm_compressor_backing_store.h"
30#include <vm/vm_protos.h>
31
32#include <IOKit/IOHibernatePrivate.h>
33
34
35boolean_t compressor_store_stop_compaction = FALSE;
36boolean_t vm_swap_up = FALSE;
fe8ab488
A
37boolean_t vm_swapfile_create_needed = FALSE;
38boolean_t vm_swapfile_gc_needed = FALSE;
39236c6e
A
39
40int swapper_throttle = -1;
41boolean_t swapper_throttle_inited = FALSE;
42uint64_t vm_swapout_thread_id;
43
44uint64_t vm_swap_put_failures = 0;
45uint64_t vm_swap_get_failures = 0;
46int vm_num_swap_files = 0;
47int vm_swapout_thread_processed_segments = 0;
48int vm_swapout_thread_awakened = 0;
fe8ab488
A
49int vm_swapfile_create_thread_awakened = 0;
50int vm_swapfile_create_thread_running = 0;
51int vm_swapfile_gc_thread_awakened = 0;
52int vm_swapfile_gc_thread_running = 0;
39236c6e 53
3e170ce0 54int64_t vm_swappin_avail = 0;
39236c6e
A
55unsigned int vm_swapfile_total_segs_alloced = 0;
56unsigned int vm_swapfile_total_segs_used = 0;
57
58
59#define SWAP_READY 0x1 /* Swap file is ready to be used */
60#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
61#define SWAP_WANTED 0x4 /* Swap file has waiters */
62#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
3e170ce0
A
63#define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
64
39236c6e
A
65
66struct swapfile{
67 queue_head_t swp_queue; /* list of swap files */
68 char *swp_path; /* saved pathname of swap file */
69 struct vnode *swp_vp; /* backing vnode */
70 uint64_t swp_size; /* size of this swap file */
71 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
72 unsigned int swp_pathlen; /* length of pathname */
73 unsigned int swp_nsegs; /* #segments we can use */
74 unsigned int swp_nseginuse; /* #segments in use */
75 unsigned int swp_index; /* index of this swap file */
76 unsigned int swp_flags; /* state of swap file */
77 unsigned int swp_free_hint; /* offset of 1st free chunk */
78 unsigned int swp_io_count; /* count of outstanding I/Os */
79 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
80
81 struct trim_list *swp_delayed_trim_list_head;
82 unsigned int swp_delayed_trim_count;
39236c6e
A
83};
84
85queue_head_t swf_global_queue;
fe8ab488 86boolean_t swp_trim_supported = FALSE;
39236c6e 87
39236c6e
A
88extern clock_sec_t dont_trim_until_ts;
89clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
fe8ab488
A
90clock_sec_t vm_swapfile_last_successful_create_ts = 0;
91int vm_swapfile_can_be_created = FALSE;
92boolean_t delayed_trim_handling_in_progress = FALSE;
39236c6e
A
93
94static void vm_swapout_thread_throttle_adjust(void);
95static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
96static void vm_swapout_thread(void);
fe8ab488
A
97static void vm_swapfile_create_thread(void);
98static void vm_swapfile_gc_thread(void);
39236c6e
A
99static void vm_swap_defragment();
100static void vm_swap_handle_delayed_trims(boolean_t);
101static void vm_swap_do_delayed_trim();
fe8ab488
A
102static void vm_swap_wait_on_trim_handling_in_progress(void);
103
39236c6e
A
104
105
3e170ce0
A
106#define VM_MAX_SWAP_FILE_NUM 100
107#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
108
39236c6e
A
109#define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
110#define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
fe8ab488 111#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
3e170ce0
A
112#define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
113#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
39236c6e
A
114 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
115#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
116
117
fe8ab488
A
118#define VM_SWAPFILE_DELAYED_CREATE 15
119
39236c6e
A
120#define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
121
122
123#if CHECKSUM_THE_SWAP
124extern unsigned int hash_string(char *cp, int len);
125#endif
126
3e170ce0
A
127#if RECORD_THE_COMPRESSED_DATA
128boolean_t c_compressed_record_init_done = FALSE;
129int c_compressed_record_write_error = 0;
130struct vnode *c_compressed_record_vp = NULL;
131uint64_t c_compressed_record_file_offset = 0;
132void c_compressed_record_init(void);
133void c_compressed_record_write(char *, int);
134#endif
135
fe8ab488 136#if ENCRYPTED_SWAP
39236c6e
A
137extern boolean_t swap_crypt_ctx_initialized;
138extern void swap_crypt_ctx_initialize(void);
139extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE];
140extern aes_ctx swap_crypt_ctx;
141extern unsigned long vm_page_encrypt_counter;
142extern unsigned long vm_page_decrypt_counter;
fe8ab488 143#endif /* ENCRYPTED_SWAP */
39236c6e
A
144
145extern void vm_pageout_io_throttle(void);
fe8ab488
A
146extern void vm_pageout_reinit_tuneables(void);
147extern void vm_swap_file_set_tuneables(void);
39236c6e
A
148
149struct swapfile *vm_swapfile_for_handle(uint64_t);
150
151/*
152 * Called with the vm_swap_data_lock held.
153 */
154
155struct swapfile *
156vm_swapfile_for_handle(uint64_t f_offset)
157{
158
159 uint64_t file_offset = 0;
160 unsigned int swapfile_index = 0;
161 struct swapfile* swf = NULL;
162
163 file_offset = (f_offset & SWAP_SLOT_MASK);
164 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
165
166 swf = (struct swapfile*) queue_first(&swf_global_queue);
167
168 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
169
170 if (swapfile_index == swf->swp_index) {
171 break;
172 }
173
174 swf = (struct swapfile*) queue_next(&swf->swp_queue);
175 }
176
177 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
178 swf = NULL;
179 }
180
181 return swf;
182}
183
184void
fe8ab488 185vm_compressor_swap_init()
39236c6e 186{
39236c6e
A
187 thread_t thread = NULL;
188
39236c6e
A
189 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
190 lck_grp_init(&vm_swap_data_lock_grp,
191 "vm_swap_data",
192 &vm_swap_data_lock_grp_attr);
193 lck_attr_setdefault(&vm_swap_data_lock_attr);
194 lck_mtx_init_ext(&vm_swap_data_lock,
195 &vm_swap_data_lock_ext,
196 &vm_swap_data_lock_grp,
197 &vm_swap_data_lock_attr);
198
199 queue_init(&swf_global_queue);
200
39236c6e 201
fe8ab488 202 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
39236c6e 203 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
fe8ab488
A
204 panic("vm_swapout_thread: create failed");
205 }
fe8ab488 206 vm_swapout_thread_id = thread->thread_id;
39236c6e 207
fe8ab488 208 thread_deallocate(thread);
39236c6e 209
fe8ab488
A
210 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
211 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
212 panic("vm_swapfile_create_thread: create failed");
213 }
39236c6e 214
fe8ab488 215 thread_deallocate(thread);
39236c6e 216
fe8ab488
A
217 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
218 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
219 panic("vm_swapfile_gc_thread: create failed");
220 }
221 thread_deallocate(thread);
39236c6e 222
fe8ab488
A
223 proc_set_task_policy_thread(kernel_task, thread->thread_id,
224 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
225 proc_set_task_policy_thread(kernel_task, thread->thread_id,
226 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
39236c6e 227
fe8ab488
A
228#if ENCRYPTED_SWAP
229 if (swap_crypt_ctx_initialized == FALSE) {
230 swap_crypt_ctx_initialize();
231 }
232#endif /* ENCRYPTED_SWAP */
233
234 memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1);
39236c6e 235
fe8ab488 236 vm_swap_up = TRUE;
39236c6e 237
fe8ab488
A
238 printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF");
239}
39236c6e 240
39236c6e 241
3e170ce0
A
242#if RECORD_THE_COMPRESSED_DATA
243
244void
245c_compressed_record_init()
246{
247 if (c_compressed_record_init_done == FALSE) {
248 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
249 c_compressed_record_init_done = TRUE;
250 }
251}
252
253void
254c_compressed_record_write(char *buf, int size)
255{
256 if (c_compressed_record_write_error == 0) {
257 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
258 c_compressed_record_file_offset += size;
259 }
260}
261#endif
262
263
264
fe8ab488
A
265void
266vm_swap_file_set_tuneables()
267{
268 struct vnode *vp;
269 char *pathname;
270 int namelen;
39236c6e 271
fe8ab488
A
272 if (strlen(swapfilename) == 0) {
273 /*
274 * If no swapfile name has been set, we'll
275 * use the default name.
276 *
277 * Also, this function is only called from the vm_pageout_scan thread
278 * via vm_consider_waking_compactor_swapper,
279 * so we don't need to worry about a race in checking/setting the name here.
280 */
281 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
282 }
283 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
284 pathname = (char*)kalloc(namelen);
285 memset(pathname, 0, namelen);
286 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
39236c6e 287
fe8ab488 288 vm_swapfile_open(pathname, &vp);
39236c6e 289
fe8ab488
A
290 if (vp == NULL)
291 goto done;
39236c6e 292
fe8ab488
A
293 if (vnode_pager_isSSD(vp) == FALSE)
294 vm_pageout_reinit_tuneables();
295 vnode_setswapmount(vp);
3e170ce0 296 vm_swappin_avail = vnode_getswappin_avail(vp);
fe8ab488 297 vm_swapfile_close((uint64_t)pathname, vp);
39236c6e 298done:
fe8ab488 299 kfree(pathname, namelen);
39236c6e
A
300}
301
fe8ab488
A
302
303#if ENCRYPTED_SWAP
39236c6e
A
304void
305vm_swap_encrypt(c_segment_t c_seg)
306{
307 vm_offset_t kernel_vaddr = 0;
308 uint64_t size = 0;
309
310 union {
311 unsigned char aes_iv[AES_BLOCK_SIZE];
312 void *c_seg;
313 } encrypt_iv;
314
315 assert(swap_crypt_ctx_initialized);
316
317 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
318
319 encrypt_iv.c_seg = (void*)c_seg;
320
321 /* encrypt the "initial vector" */
322 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
323 swap_crypt_null_iv,
324 1,
325 &encrypt_iv.aes_iv[0],
326 &swap_crypt_ctx.encrypt);
327
328 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
329 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
330
331 /*
332 * Encrypt the c_segment.
333 */
334 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
335 &encrypt_iv.aes_iv[0],
336 (unsigned int)(size / AES_BLOCK_SIZE),
337 (unsigned char *) kernel_vaddr,
338 &swap_crypt_ctx.encrypt);
339
340 vm_page_encrypt_counter += (size/PAGE_SIZE_64);
341}
342
343void
344vm_swap_decrypt(c_segment_t c_seg)
345{
346
347 vm_offset_t kernel_vaddr = 0;
348 uint64_t size = 0;
349
350 union {
351 unsigned char aes_iv[AES_BLOCK_SIZE];
352 void *c_seg;
353 } decrypt_iv;
354
355
356 assert(swap_crypt_ctx_initialized);
357
358 /*
359 * Prepare an "initial vector" for the decryption.
360 * It has to be the same as the "initial vector" we
361 * used to encrypt that page.
362 */
363 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
364
365 decrypt_iv.c_seg = (void*)c_seg;
366
367 /* encrypt the "initial vector" */
368 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
369 swap_crypt_null_iv,
370 1,
371 &decrypt_iv.aes_iv[0],
372 &swap_crypt_ctx.encrypt);
373
374 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
375 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
376
377 /*
378 * Decrypt the c_segment.
379 */
380 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
381 &decrypt_iv.aes_iv[0],
382 (unsigned int) (size / AES_BLOCK_SIZE),
383 (unsigned char *) kernel_vaddr,
384 &swap_crypt_ctx.decrypt);
385
386 vm_page_decrypt_counter += (size/PAGE_SIZE_64);
387}
fe8ab488 388#endif /* ENCRYPTED_SWAP */
39236c6e
A
389
390
391void
392vm_swap_consider_defragmenting()
393{
fe8ab488
A
394 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
395 (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
39236c6e 396
fe8ab488 397 if (!vm_swapfile_gc_thread_running) {
39236c6e
A
398 lck_mtx_lock(&vm_swap_data_lock);
399
fe8ab488
A
400 if (!vm_swapfile_gc_thread_running)
401 thread_wakeup((event_t) &vm_swapfile_gc_needed);
39236c6e
A
402
403 lck_mtx_unlock(&vm_swap_data_lock);
404 }
405 }
406}
407
408
409int vm_swap_defragment_yielded = 0;
410int vm_swap_defragment_swapin = 0;
411int vm_swap_defragment_free = 0;
412int vm_swap_defragment_busy = 0;
413
414
415static void
416vm_swap_defragment()
417{
418 c_segment_t c_seg;
419
420 /*
421 * have to grab the master lock w/o holding
422 * any locks in spin mode
423 */
424 PAGE_REPLACEMENT_DISALLOWED(TRUE);
425
426 lck_mtx_lock_spin_always(c_list_lock);
427
428 while (!queue_empty(&c_swappedout_sparse_list_head)) {
429
430 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
431 vm_swap_defragment_yielded++;
432 break;
433 }
434 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
435
436 lck_mtx_lock_spin_always(&c_seg->c_lock);
437
3e170ce0 438 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
39236c6e
A
439
440 if (c_seg->c_busy) {
441 lck_mtx_unlock_always(c_list_lock);
442
443 PAGE_REPLACEMENT_DISALLOWED(FALSE);
444 /*
445 * c_seg_wait_on_busy consumes c_seg->c_lock
446 */
447 c_seg_wait_on_busy(c_seg);
448
449 PAGE_REPLACEMENT_DISALLOWED(TRUE);
450
451 lck_mtx_lock_spin_always(c_list_lock);
452
453 vm_swap_defragment_busy++;
454 continue;
455 }
456 if (c_seg->c_bytes_used == 0) {
457 /*
458 * c_seg_free_locked consumes the c_list_lock
459 * and c_seg->c_lock
460 */
3e170ce0 461 C_SEG_BUSY(c_seg);
39236c6e
A
462 c_seg_free_locked(c_seg);
463
464 vm_swap_defragment_free++;
465 } else {
466 lck_mtx_unlock_always(c_list_lock);
467
468 c_seg_swapin(c_seg, TRUE);
469 lck_mtx_unlock_always(&c_seg->c_lock);
470
471 vm_swap_defragment_swapin++;
472 }
473 PAGE_REPLACEMENT_DISALLOWED(FALSE);
474
475 vm_pageout_io_throttle();
476
477 /*
478 * because write waiters have privilege over readers,
479 * dropping and immediately retaking the master lock will
480 * still allow any thread waiting to acquire the
481 * master lock exclusively an opportunity to take it
482 */
483 PAGE_REPLACEMENT_DISALLOWED(TRUE);
484
485 lck_mtx_lock_spin_always(c_list_lock);
486 }
487 lck_mtx_unlock_always(c_list_lock);
488
489 PAGE_REPLACEMENT_DISALLOWED(FALSE);
490}
491
492
493
494static void
fe8ab488 495vm_swapfile_create_thread(void)
39236c6e 496{
39236c6e
A
497 clock_sec_t sec;
498 clock_nsec_t nsec;
499
3e170ce0
A
500 current_thread()->options |= TH_OPT_VMPRIV;
501
fe8ab488
A
502 vm_swapfile_create_thread_awakened++;
503 vm_swapfile_create_thread_running = 1;
39236c6e 504
fe8ab488 505 while (TRUE) {
39236c6e
A
506 /*
507 * walk through the list of swap files
508 * and do the delayed frees/trims for
509 * any swap file whose count of delayed
510 * frees is above the batch limit
511 */
512 vm_swap_handle_delayed_trims(FALSE);
513
fe8ab488 514 lck_mtx_lock(&vm_swap_data_lock);
39236c6e 515
fe8ab488 516 clock_get_system_nanotime(&sec, &nsec);
39236c6e 517
fe8ab488
A
518 if (VM_SWAP_SHOULD_CREATE(sec) == 0)
519 break;
39236c6e 520
fe8ab488 521 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 522
fe8ab488
A
523 if (vm_swap_create_file() == FALSE) {
524 vm_swapfile_last_failed_to_create_ts = sec;
525 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
39236c6e 526
fe8ab488
A
527 } else
528 vm_swapfile_last_successful_create_ts = sec;
529 }
530 vm_swapfile_create_thread_running = 0;
39236c6e 531
fe8ab488 532 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
39236c6e 533
fe8ab488 534 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 535
fe8ab488
A
536 thread_block((thread_continue_t)vm_swapfile_create_thread);
537
538 /* NOTREACHED */
539}
39236c6e 540
39236c6e 541
fe8ab488
A
542static void
543vm_swapfile_gc_thread(void)
544{
545 boolean_t need_defragment;
546 boolean_t need_reclaim;
547
548 vm_swapfile_gc_thread_awakened++;
549 vm_swapfile_gc_thread_running = 1;
550
551 while (TRUE) {
552
553 lck_mtx_lock(&vm_swap_data_lock);
554
555 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
556 break;
557
558 need_defragment = FALSE;
559 need_reclaim = FALSE;
560
561 if (VM_SWAP_SHOULD_DEFRAGMENT())
562 need_defragment = TRUE;
563
564 if (VM_SWAP_SHOULD_RECLAIM()) {
565 need_defragment = TRUE;
566 need_reclaim = TRUE;
567 }
568 if (need_defragment == FALSE && need_reclaim == FALSE)
569 break;
570
39236c6e 571 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 572
fe8ab488
A
573 if (need_defragment == TRUE)
574 vm_swap_defragment();
575 if (need_reclaim == TRUE)
576 vm_swap_reclaim();
577 }
578 vm_swapfile_gc_thread_running = 0;
39236c6e 579
fe8ab488 580 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
39236c6e
A
581
582 lck_mtx_unlock(&vm_swap_data_lock);
583
fe8ab488 584 thread_block((thread_continue_t)vm_swapfile_gc_thread);
39236c6e
A
585
586 /* NOTREACHED */
587}
588
589
590
591int swapper_entered_T0 = 0;
592int swapper_entered_T1 = 0;
593int swapper_entered_T2 = 0;
594
595static void
596vm_swapout_thread_throttle_adjust(void)
597{
598 int swapper_throttle_new;
599
600 if (swapper_throttle_inited == FALSE) {
601 /*
602 * force this thread to be set to the correct
603 * throttling tier
604 */
605 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
606 swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
607 swapper_throttle_inited = TRUE;
608 swapper_entered_T2++;
609 goto done;
610 }
611 swapper_throttle_new = swapper_throttle;
612
613
614 switch(swapper_throttle) {
615
616 case THROTTLE_LEVEL_COMPRESSOR_TIER2:
617
618 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) {
619 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
620 swapper_entered_T1++;
621 break;
622 }
623 break;
624
625 case THROTTLE_LEVEL_COMPRESSOR_TIER1:
626
627 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
628 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0;
629 swapper_entered_T0++;
630 break;
631 }
632 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) {
633 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
634 swapper_entered_T2++;
635 break;
636 }
637 break;
638
639 case THROTTLE_LEVEL_COMPRESSOR_TIER0:
640
641 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
642 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
643 swapper_entered_T2++;
644 break;
645 }
646 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
647 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
648 swapper_entered_T1++;
649 break;
650 }
651 break;
652 }
653done:
654 if (swapper_throttle != swapper_throttle_new) {
655 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
656 TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
657 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
658 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
659
660 swapper_throttle = swapper_throttle_new;
661 }
662}
663
664
3e170ce0
A
665int vm_swapout_found_empty = 0;
666
667
39236c6e
A
668static void
669vm_swapout_thread(void)
670{
671 uint64_t f_offset = 0;
672 uint32_t size = 0;
673 c_segment_t c_seg = NULL;
674 kern_return_t kr = KERN_SUCCESS;
675 vm_offset_t addr = 0;
676
3e170ce0
A
677 current_thread()->options |= TH_OPT_VMPRIV;
678
39236c6e
A
679 vm_swapout_thread_awakened++;
680
681 lck_mtx_lock_spin_always(c_list_lock);
682
683 while (!queue_empty(&c_swapout_list_head)) {
684
685 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
686
687 lck_mtx_lock_spin_always(&c_seg->c_lock);
688
3e170ce0 689 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
39236c6e
A
690
691 if (c_seg->c_busy) {
39236c6e
A
692 lck_mtx_unlock_always(c_list_lock);
693
8a3053a0 694 c_seg_wait_on_busy(c_seg);
39236c6e
A
695
696 lck_mtx_lock_spin_always(c_list_lock);
697
698 continue;
699 }
39236c6e
A
700 vm_swapout_thread_processed_segments++;
701
8a3053a0
A
702 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
703
704 if (size == 0) {
3e170ce0
A
705 assert(c_seg->c_on_minorcompact_q);
706 assert(c_seg->c_bytes_used == 0);
707
708 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
709 lck_mtx_unlock_always(&c_seg->c_lock);
710 lck_mtx_unlock_always(c_list_lock);
711
712 vm_swapout_found_empty++;
713 goto c_seg_is_empty;
8a3053a0 714 }
fe8ab488 715 C_SEG_BUSY(c_seg);
8a3053a0
A
716 c_seg->c_busy_swapping = 1;
717
39236c6e
A
718 lck_mtx_unlock_always(c_list_lock);
719
720 addr = (vm_offset_t) c_seg->c_store.c_buffer;
721
39236c6e
A
722 lck_mtx_unlock_always(&c_seg->c_lock);
723
724#if CHECKSUM_THE_SWAP
725 c_seg->cseg_hash = hash_string((char*)addr, (int)size);
726 c_seg->cseg_swap_size = size;
727#endif /* CHECKSUM_THE_SWAP */
728
fe8ab488 729#if ENCRYPTED_SWAP
39236c6e 730 vm_swap_encrypt(c_seg);
fe8ab488 731#endif /* ENCRYPTED_SWAP */
39236c6e
A
732
733 vm_swapout_thread_throttle_adjust();
734
735 kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg);
736
737 PAGE_REPLACEMENT_DISALLOWED(TRUE);
738
3e170ce0
A
739 if (kr == KERN_SUCCESS) {
740 kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
741 }
39236c6e
A
742 lck_mtx_lock_spin_always(c_list_lock);
743 lck_mtx_lock_spin_always(&c_seg->c_lock);
744
745 if (kr == KERN_SUCCESS) {
3e170ce0
A
746 int new_state = C_ON_SWAPPEDOUT_Q;
747 boolean_t insert_head = FALSE;
39236c6e 748
3e170ce0
A
749 if (hibernate_flushing == TRUE) {
750 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
751 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)
752 insert_head = TRUE;
753 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg))
754 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
39236c6e 755
3e170ce0 756 c_seg_switch_state(c_seg, new_state, insert_head);
39236c6e 757
39236c6e 758 c_seg->c_store.c_swap_handle = f_offset;
39236c6e
A
759
760 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
761
762 if (c_seg->c_bytes_used)
763 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
764 } else {
fe8ab488 765#if ENCRYPTED_SWAP
39236c6e 766 vm_swap_decrypt(c_seg);
fe8ab488 767#endif /* ENCRYPTED_SWAP */
3e170ce0
A
768 if (c_seg->c_overage_swap == TRUE) {
769 c_seg->c_overage_swap = FALSE;
770 c_overage_swapped_count--;
771 }
772 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
39236c6e
A
773 }
774 lck_mtx_unlock_always(c_list_lock);
775
3e170ce0
A
776 c_seg->c_busy_swapping = 0;
777 C_SEG_WAKEUP_DONE(c_seg);
778 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e
A
779
780 PAGE_REPLACEMENT_DISALLOWED(FALSE);
781
39236c6e 782 vm_pageout_io_throttle();
3e170ce0 783c_seg_is_empty:
39236c6e
A
784 if (c_swapout_count == 0)
785 vm_swap_consider_defragmenting();
786
787 lck_mtx_lock_spin_always(c_list_lock);
788 }
789
790 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
791
792 lck_mtx_unlock_always(c_list_lock);
793
794 thread_block((thread_continue_t)vm_swapout_thread);
795
796 /* NOTREACHED */
797}
798
799boolean_t
800vm_swap_create_file()
801{
802 uint64_t size = 0;
803 int namelen = 0;
804 boolean_t swap_file_created = FALSE;
805 boolean_t swap_file_reuse = FALSE;
3e170ce0 806 boolean_t swap_file_pin = FALSE;
39236c6e
A
807 struct swapfile *swf = NULL;
808
39236c6e
A
809 /*
810 * Any swapfile structure ready for re-use?
811 */
812
813 lck_mtx_lock(&vm_swap_data_lock);
814
815 swf = (struct swapfile*) queue_first(&swf_global_queue);
816
817 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
818 if (swf->swp_flags == SWAP_REUSE) {
819 swap_file_reuse = TRUE;
820 break;
821 }
822 swf = (struct swapfile*) queue_next(&swf->swp_queue);
823 }
824
825 lck_mtx_unlock(&vm_swap_data_lock);
826
827 if (swap_file_reuse == FALSE) {
828
fe8ab488
A
829 if (strlen(swapfilename) == 0) {
830 /*
831 * If no swapfile name has been set, we'll
832 * use the default name.
833 *
834 * Also, this function is only called from the swapfile management thread.
835 * So we don't need to worry about a race in checking/setting the name here.
836 */
837
838 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
839 }
840
841 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
39236c6e
A
842
843 swf = (struct swapfile*) kalloc(sizeof *swf);
844 memset(swf, 0, sizeof(*swf));
845
846 swf->swp_index = vm_num_swap_files + 1;
847 swf->swp_pathlen = namelen;
848 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
849
850 memset(swf->swp_path, 0, namelen);
851
fe8ab488 852 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
39236c6e
A
853 }
854
855 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
856
857 if (swf->swp_vp == NULL) {
858 if (swap_file_reuse == FALSE) {
859 kfree(swf->swp_path, swf->swp_pathlen);
860 kfree(swf, sizeof *swf);
861 }
862 return FALSE;
863 }
fe8ab488
A
864 vm_swapfile_can_be_created = TRUE;
865
39236c6e
A
866 size = MAX_SWAP_FILE_SIZE;
867
868 while (size >= MIN_SWAP_FILE_SIZE) {
869
3e170ce0
A
870 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
871
872 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
39236c6e
A
873
874 int num_bytes_for_bitmap = 0;
875
876 swap_file_created = TRUE;
877
878 swf->swp_size = size;
879 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
880 swf->swp_nseginuse = 0;
881 swf->swp_free_hint = 0;
882
883 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1);
884 /*
885 * Allocate a bitmap that describes the
886 * number of segments held by this swapfile.
887 */
888 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
889 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
890
891 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
892 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
893
894 /*
895 * passing a NULL trim_list into vnode_trim_list
896 * will return ENOTSUP if trim isn't supported
897 * and 0 if it is
898 */
fe8ab488
A
899 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
900 swp_trim_supported = TRUE;
39236c6e
A
901
902 lck_mtx_lock(&vm_swap_data_lock);
903
904 swf->swp_flags = SWAP_READY;
905
906 if (swap_file_reuse == FALSE) {
907 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
908 }
909
910 vm_num_swap_files++;
911
912 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
913
3e170ce0
A
914 if (swap_file_pin == TRUE) {
915 swf->swp_flags |= SWAP_PINNED;
916 vm_swappin_avail -= swf->swp_size;
917 }
918
39236c6e
A
919 lck_mtx_unlock(&vm_swap_data_lock);
920
921 thread_wakeup((event_t) &vm_num_swap_files);
39236c6e
A
922 break;
923 } else {
924
925 size = size / 2;
926 }
927 }
928 if (swap_file_created == FALSE) {
929
930 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
931
932 swf->swp_vp = NULL;
933
934 if (swap_file_reuse == FALSE) {
935 kfree(swf->swp_path, swf->swp_pathlen);
936 kfree(swf, sizeof *swf);
937 }
938 }
939 return swap_file_created;
940}
941
942
943kern_return_t
944vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size)
945{
946 struct swapfile *swf = NULL;
947 uint64_t file_offset = 0;
fe8ab488 948 int retval = 0;
39236c6e
A
949
950 if (addr == 0) {
951 return KERN_FAILURE;
952 }
953
954 lck_mtx_lock(&vm_swap_data_lock);
955
956 swf = vm_swapfile_for_handle(f_offset);
957
fe8ab488
A
958 if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
959 retval = 1;
960 goto done;
39236c6e 961 }
fe8ab488
A
962 swf->swp_io_count++;
963
964 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 965
fe8ab488 966 file_offset = (f_offset & SWAP_SLOT_MASK);
39236c6e
A
967 retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ);
968
fe8ab488
A
969 if (retval == 0)
970 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
971 else
972 vm_swap_get_failures++;
973
39236c6e
A
974 /*
975 * Free this slot in the swap structure.
976 */
977 vm_swap_free(f_offset);
978
979 lck_mtx_lock(&vm_swap_data_lock);
980 swf->swp_io_count--;
981
982 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
983
984 swf->swp_flags &= ~SWAP_WANTED;
985 thread_wakeup((event_t) &swf->swp_flags);
986 }
fe8ab488 987done:
39236c6e
A
988 lck_mtx_unlock(&vm_swap_data_lock);
989
990 if (retval == 0)
991 return KERN_SUCCESS;
fe8ab488 992 else
39236c6e 993 return KERN_FAILURE;
39236c6e
A
994}
995
996kern_return_t
997vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg)
998{
999 unsigned int segidx = 0;
1000 struct swapfile *swf = NULL;
1001 uint64_t file_offset = 0;
1002 uint64_t swapfile_index = 0;
1003 unsigned int byte_for_segidx = 0;
1004 unsigned int offset_within_byte = 0;
1005 boolean_t swf_eligible = FALSE;
1006 boolean_t waiting = FALSE;
fe8ab488 1007 boolean_t retried = FALSE;
39236c6e
A
1008 int error = 0;
1009 clock_sec_t sec;
1010 clock_nsec_t nsec;
1011
1012 if (addr == 0 || f_offset == NULL) {
1013 return KERN_FAILURE;
1014 }
fe8ab488 1015retry:
39236c6e
A
1016 lck_mtx_lock(&vm_swap_data_lock);
1017
1018 swf = (struct swapfile*) queue_first(&swf_global_queue);
1019
1020 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1021
1022 segidx = swf->swp_free_hint;
1023
1024 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1025
1026 if (swf_eligible) {
1027
1028 while(segidx < swf->swp_nsegs) {
1029
1030 byte_for_segidx = segidx >> 3;
1031 offset_within_byte = segidx % 8;
1032
1033 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1034 segidx++;
1035 continue;
1036 }
1037
1038 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1039
1040 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1041 swf->swp_nseginuse++;
1042 swf->swp_io_count++;
1043 swapfile_index = swf->swp_index;
1044
1045 vm_swapfile_total_segs_used++;
1046
1047 clock_get_system_nanotime(&sec, &nsec);
1048
fe8ab488
A
1049 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1050 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1051
1052 lck_mtx_unlock(&vm_swap_data_lock);
1053
1054 goto done;
1055 }
1056 }
1057 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1058 }
1059 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1060
1061 /*
1062 * we've run out of swap segments, but may not
1063 * be in a position to immediately create a new swap
1064 * file if we've recently failed to create due to a lack
1065 * of free space in the root filesystem... we'll try
1066 * to kick that create off, but in any event we're going
1067 * to take a breather (up to 1 second) so that we're not caught in a tight
1068 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1069 * segments into swap files only to have them immediately put back
1070 * on the c_age queue due to vm_swap_put failing.
1071 *
1072 * if we're doing these puts due to a hibernation flush,
1073 * no need to block... setting hibernate_no_swapspace to TRUE,
1074 * will cause "vm_compressor_compact_and_swap" to immediately abort
1075 */
1076 clock_get_system_nanotime(&sec, &nsec);
1077
fe8ab488
A
1078 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1079 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1080
1081 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1082 waiting = TRUE;
1083 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
1084 } else
1085 hibernate_no_swapspace = TRUE;
1086
1087 lck_mtx_unlock(&vm_swap_data_lock);
1088
fe8ab488 1089 if (waiting == TRUE) {
39236c6e
A
1090 thread_block(THREAD_CONTINUE_NULL);
1091
fe8ab488
A
1092 if (retried == FALSE && hibernate_flushing == TRUE) {
1093 retried = TRUE;
1094 goto retry;
1095 }
1096 }
3e170ce0 1097 vm_swap_put_failures++;
fe8ab488 1098
39236c6e
A
1099 return KERN_FAILURE;
1100
1101done:
1102 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE);
1103
1104 lck_mtx_lock(&vm_swap_data_lock);
1105
1106 swf->swp_csegs[segidx] = c_seg;
1107
1108 swf->swp_io_count--;
1109
1110 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1111
1112 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1113
1114 swf->swp_flags &= ~SWAP_WANTED;
1115 thread_wakeup((event_t) &swf->swp_flags);
1116 }
1117
1118 lck_mtx_unlock(&vm_swap_data_lock);
1119
39236c6e
A
1120 if (error) {
1121 vm_swap_free(*f_offset);
1122
3e170ce0
A
1123 vm_swap_put_failures++;
1124
39236c6e
A
1125 return KERN_FAILURE;
1126 }
1127 return KERN_SUCCESS;
1128}
1129
1130
1131
1132static void
1133vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1134{
1135 uint64_t file_offset = 0;
1136 unsigned int segidx = 0;
1137
1138
1139 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1140
1141 unsigned int byte_for_segidx = 0;
1142 unsigned int offset_within_byte = 0;
1143
1144 file_offset = (f_offset & SWAP_SLOT_MASK);
1145 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1146
1147 byte_for_segidx = segidx >> 3;
1148 offset_within_byte = segidx % 8;
1149
1150 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1151
1152 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1153
1154 swf->swp_csegs[segidx] = NULL;
1155
1156 swf->swp_nseginuse--;
1157 vm_swapfile_total_segs_used--;
1158
1159 if (segidx < swf->swp_free_hint) {
1160 swf->swp_free_hint = segidx;
1161 }
1162 }
fe8ab488
A
1163 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1164 thread_wakeup((event_t) &vm_swapfile_gc_needed);
39236c6e 1165 }
39236c6e
A
1166}
1167
1168
1169uint32_t vm_swap_free_now_count = 0;
1170uint32_t vm_swap_free_delayed_count = 0;
1171
1172
1173void
1174vm_swap_free(uint64_t f_offset)
1175{
1176 struct swapfile *swf = NULL;
fe8ab488 1177 struct trim_list *tl = NULL;
39236c6e
A
1178 clock_sec_t sec;
1179 clock_nsec_t nsec;
1180
fe8ab488
A
1181 if (swp_trim_supported == TRUE)
1182 tl = kalloc(sizeof(struct trim_list));
1183
39236c6e
A
1184 lck_mtx_lock(&vm_swap_data_lock);
1185
1186 swf = vm_swapfile_for_handle(f_offset);
1187
1188 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1189
fe8ab488 1190 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
39236c6e
A
1191 /*
1192 * don't delay the free if the underlying disk doesn't support
1193 * trim, or we're in the midst of reclaiming this swap file since
1194 * we don't want to move segments that are technically free
1195 * but not yet handled by the delayed free mechanism
1196 */
1197 vm_swap_free_now(swf, f_offset);
1198
1199 vm_swap_free_now_count++;
fe8ab488 1200 goto done;
39236c6e 1201 }
39236c6e
A
1202 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1203 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1204
1205 tl->tl_next = swf->swp_delayed_trim_list_head;
1206 swf->swp_delayed_trim_list_head = tl;
1207 swf->swp_delayed_trim_count++;
fe8ab488 1208 tl = NULL;
39236c6e 1209
fe8ab488 1210 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
39236c6e
A
1211 clock_get_system_nanotime(&sec, &nsec);
1212
1213 if (sec > dont_trim_until_ts)
fe8ab488 1214 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1215 }
1216 vm_swap_free_delayed_count++;
1217 }
fe8ab488 1218done:
39236c6e 1219 lck_mtx_unlock(&vm_swap_data_lock);
fe8ab488
A
1220
1221 if (tl != NULL)
1222 kfree(tl, sizeof(struct trim_list));
39236c6e
A
1223}
1224
1225
fe8ab488
A
1226static void
1227vm_swap_wait_on_trim_handling_in_progress()
1228{
1229 while (delayed_trim_handling_in_progress == TRUE) {
1230
1231 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1232 lck_mtx_unlock(&vm_swap_data_lock);
1233
1234 thread_block(THREAD_CONTINUE_NULL);
1235
1236 lck_mtx_lock(&vm_swap_data_lock);
1237 }
1238}
1239
1240
39236c6e
A
1241static void
1242vm_swap_handle_delayed_trims(boolean_t force_now)
1243{
1244 struct swapfile *swf = NULL;
1245
1246 /*
fe8ab488
A
1247 * serialize the race between us and vm_swap_reclaim...
1248 * if vm_swap_reclaim wins it will turn off SWAP_READY
1249 * on the victim it has chosen... we can just skip over
1250 * that file since vm_swap_reclaim will first process
1251 * all of the delayed trims associated with it
1252 */
1253 lck_mtx_lock(&vm_swap_data_lock);
1254
1255 delayed_trim_handling_in_progress = TRUE;
1256
1257 lck_mtx_unlock(&vm_swap_data_lock);
1258
1259 /*
1260 * no need to hold the lock to walk the swf list since
1261 * vm_swap_create (the only place where we add to this list)
1262 * is run on the same thread as this function
1263 * and vm_swap_reclaim doesn't remove items from this list
1264 * instead marking them with SWAP_REUSE for future re-use
39236c6e
A
1265 */
1266 swf = (struct swapfile*) queue_first(&swf_global_queue);
1267
1268 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1269
fe8ab488 1270 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
39236c6e 1271
fe8ab488 1272 assert(!(swf->swp_flags & SWAP_RECLAIM));
39236c6e 1273 vm_swap_do_delayed_trim(swf);
fe8ab488 1274 }
39236c6e
A
1275 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1276 }
fe8ab488
A
1277 lck_mtx_lock(&vm_swap_data_lock);
1278
1279 delayed_trim_handling_in_progress = FALSE;
1280 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
39236c6e 1281
fe8ab488
A
1282 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1283 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1284
1285 lck_mtx_unlock(&vm_swap_data_lock);
1286
1287}
39236c6e
A
1288
1289static void
1290vm_swap_do_delayed_trim(struct swapfile *swf)
1291{
1292 struct trim_list *tl, *tl_head;
1293
1294 lck_mtx_lock(&vm_swap_data_lock);
1295
1296 tl_head = swf->swp_delayed_trim_list_head;
1297 swf->swp_delayed_trim_list_head = NULL;
1298 swf->swp_delayed_trim_count = 0;
1299
1300 lck_mtx_unlock(&vm_swap_data_lock);
1301
fe8ab488 1302 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
39236c6e
A
1303
1304 while ((tl = tl_head) != NULL) {
1305 unsigned int segidx = 0;
1306 unsigned int byte_for_segidx = 0;
1307 unsigned int offset_within_byte = 0;
1308
1309 lck_mtx_lock(&vm_swap_data_lock);
1310
1311 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1312
1313 byte_for_segidx = segidx >> 3;
1314 offset_within_byte = segidx % 8;
1315
1316 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1317
1318 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1319
1320 swf->swp_csegs[segidx] = NULL;
1321
1322 swf->swp_nseginuse--;
1323 vm_swapfile_total_segs_used--;
1324
1325 if (segidx < swf->swp_free_hint) {
1326 swf->swp_free_hint = segidx;
1327 }
1328 }
1329 lck_mtx_unlock(&vm_swap_data_lock);
1330
1331 tl_head = tl->tl_next;
1332
1333 kfree(tl, sizeof(struct trim_list));
1334 }
1335}
1336
1337
1338void
1339vm_swap_flush()
1340{
1341 return;
1342}
1343
1344int vm_swap_reclaim_yielded = 0;
1345
1346void
1347vm_swap_reclaim(void)
1348{
1349 vm_offset_t addr = 0;
1350 unsigned int segidx = 0;
1351 uint64_t f_offset = 0;
1352 struct swapfile *swf = NULL;
1353 struct swapfile *smallest_swf = NULL;
1354 unsigned int min_nsegs = 0;
1355 unsigned int byte_for_segidx = 0;
1356 unsigned int offset_within_byte = 0;
1357 uint32_t c_size = 0;
1358
1359 c_segment_t c_seg = NULL;
1360
3e170ce0 1361 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
39236c6e
A
1362 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1363 }
1364
1365 lck_mtx_lock(&vm_swap_data_lock);
1366
fe8ab488
A
1367 /*
1368 * if we're running the swapfile list looking for
1369 * candidates with delayed trims, we need to
1370 * wait before making our decision concerning
1371 * the swapfile we want to reclaim
1372 */
1373 vm_swap_wait_on_trim_handling_in_progress();
1374
1375 /*
1376 * from here until we knock down the SWAP_READY bit,
1377 * we need to remain behind the vm_swap_data_lock...
1378 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1379 * will not consider this swapfile for processing
1380 */
39236c6e
A
1381 swf = (struct swapfile*) queue_first(&swf_global_queue);
1382 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1383 smallest_swf = NULL;
1384
1385 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1386
1387 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1388
1389 smallest_swf = swf;
1390 min_nsegs = swf->swp_nseginuse;
1391 }
1392 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1393 }
1394
1395 if (smallest_swf == NULL)
1396 goto done;
1397
1398 swf = smallest_swf;
1399
1400
1401 swf->swp_flags &= ~SWAP_READY;
1402 swf->swp_flags |= SWAP_RECLAIM;
1403
1404 if (swf->swp_delayed_trim_count) {
1405
1406 lck_mtx_unlock(&vm_swap_data_lock);
1407
1408 vm_swap_do_delayed_trim(swf);
1409
1410 lck_mtx_lock(&vm_swap_data_lock);
1411 }
1412 segidx = 0;
1413
1414 while (segidx < swf->swp_nsegs) {
1415
1416ReTry_for_cseg:
39236c6e
A
1417 /*
1418 * Wait for outgoing I/Os.
1419 */
1420 while (swf->swp_io_count) {
1421
1422 swf->swp_flags |= SWAP_WANTED;
1423
1424 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1425 lck_mtx_unlock(&vm_swap_data_lock);
1426
1427 thread_block(THREAD_CONTINUE_NULL);
1428
1429 lck_mtx_lock(&vm_swap_data_lock);
1430 }
fe8ab488
A
1431 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1432 vm_swap_reclaim_yielded++;
1433 break;
1434 }
39236c6e
A
1435
1436 byte_for_segidx = segidx >> 3;
1437 offset_within_byte = segidx % 8;
1438
1439 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1440
1441 segidx++;
1442 continue;
1443 }
1444
1445 c_seg = swf->swp_csegs[segidx];
3e170ce0 1446 assert(c_seg);
39236c6e
A
1447
1448 lck_mtx_lock_spin_always(&c_seg->c_lock);
1449
39236c6e 1450 if (c_seg->c_busy) {
3e170ce0
A
1451 /*
1452 * a swapped out c_segment in the process of being freed will remain in the
1453 * busy state until after the vm_swap_free is called on it... vm_swap_free
1454 * takes the vm_swap_data_lock, so can't change the swap state until after
1455 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1456 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1457 * at that point, we re-look up the swap state which will now indicate that
1458 * this c_segment no longer exists.
1459 */
39236c6e
A
1460 c_seg->c_wanted = 1;
1461
1462 assert_wait((event_t) (c_seg), THREAD_UNINT);
1463 lck_mtx_unlock_always(&c_seg->c_lock);
1464
1465 lck_mtx_unlock(&vm_swap_data_lock);
1466
1467 thread_block(THREAD_CONTINUE_NULL);
1468
1469 lck_mtx_lock(&vm_swap_data_lock);
1470
1471 goto ReTry_for_cseg;
1472 }
1473 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1474
1475 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
3e170ce0
A
1476
1477 assert(c_seg == swf->swp_csegs[segidx]);
39236c6e
A
1478 swf->swp_csegs[segidx] = NULL;
1479 swf->swp_nseginuse--;
1480
1481 vm_swapfile_total_segs_used--;
1482
1483 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 1484
3e170ce0
A
1485 assert(C_SEG_IS_ONDISK(c_seg));
1486
1487 C_SEG_BUSY(c_seg);
1488 c_seg->c_busy_swapping = 1;
39236c6e 1489#if !CHECKSUM_THE_SWAP
3e170ce0 1490 c_seg_trim_tail(c_seg);
39236c6e 1491#endif
3e170ce0 1492 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
39236c6e 1493
3e170ce0 1494 assert(c_size <= C_SEG_BUFSIZE && c_size);
39236c6e 1495
3e170ce0 1496 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e 1497
3e170ce0 1498 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
39236c6e 1499
3e170ce0
A
1500 /*
1501 * reading the data back in failed, so convert c_seg
1502 * to a swapped in c_segment that contains no data
1503 */
1504 c_seg_swapin_requeue(c_seg, FALSE);
1505 /*
1506 * returns with c_busy_swapping cleared
1507 */
39236c6e 1508
3e170ce0
A
1509 vm_swap_get_failures++;
1510 goto swap_io_failed;
1511 }
1512 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
39236c6e 1513
3e170ce0
A
1514 if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
1515 vm_offset_t c_buffer;
39236c6e 1516
3e170ce0
A
1517 /*
1518 * the put failed, so convert c_seg to a fully swapped in c_segment
1519 * with valid data
1520 */
1521 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
fe8ab488 1522
3e170ce0 1523 kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
39236c6e 1524
3e170ce0 1525 memcpy((char *)c_buffer, (char *)addr, c_size);
39236c6e 1526
3e170ce0 1527 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
fe8ab488 1528#if ENCRYPTED_SWAP
3e170ce0 1529 vm_swap_decrypt(c_seg);
fe8ab488 1530#endif /* ENCRYPTED_SWAP */
3e170ce0
A
1531 c_seg_swapin_requeue(c_seg, TRUE);
1532 /*
1533 * returns with c_busy_swapping cleared
1534 */
1535 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
39236c6e 1536
3e170ce0
A
1537 goto swap_io_failed;
1538 }
1539 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
39236c6e 1540
3e170ce0 1541 lck_mtx_lock_spin_always(&c_seg->c_lock);
39236c6e 1542
3e170ce0
A
1543 assert(C_SEG_IS_ONDISK(c_seg));
1544 /*
1545 * The c_seg will now know about the new location on disk.
1546 */
1547 c_seg->c_store.c_swap_handle = f_offset;
1548 c_seg->c_busy_swapping = 0;
39236c6e 1549swap_io_failed:
3e170ce0 1550 C_SEG_WAKEUP_DONE(c_seg);
39236c6e 1551
3e170ce0 1552 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e
A
1553 lck_mtx_lock(&vm_swap_data_lock);
1554 }
1555
1556 if (swf->swp_nseginuse) {
1557
1558 swf->swp_flags &= ~SWAP_RECLAIM;
1559 swf->swp_flags |= SWAP_READY;
1560
1561 goto done;
1562 }
1563 /*
1564 * We don't remove this inactive swf from the queue.
1565 * That way, we can re-use it when needed again and
fe8ab488
A
1566 * preserve the namespace. The delayed_trim processing
1567 * is also dependent on us not removing swfs from the queue.
39236c6e
A
1568 */
1569 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1570
1571 vm_num_swap_files--;
1572
1573 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
1574
1575 lck_mtx_unlock(&vm_swap_data_lock);
1576
1577 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1578
1579 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
1580 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
1581
1582 lck_mtx_lock(&vm_swap_data_lock);
1583
3e170ce0
A
1584 if (swf->swp_flags & SWAP_PINNED) {
1585 vm_swappin_avail += swf->swp_size;
1586 }
1587
39236c6e
A
1588 swf->swp_vp = NULL;
1589 swf->swp_size = 0;
1590 swf->swp_free_hint = 0;
1591 swf->swp_nsegs = 0;
1592 swf->swp_flags = SWAP_REUSE;
1593
39236c6e 1594done:
fe8ab488 1595 thread_wakeup((event_t) &swf->swp_flags);
39236c6e
A
1596 lck_mtx_unlock(&vm_swap_data_lock);
1597
1598 kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
1599}
1600
1601
1602uint64_t
1603vm_swap_get_total_space(void)
1604{
1605 uint64_t total_space = 0;
1606
1607 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
1608
1609 return total_space;
1610}
1611
1612uint64_t
1613vm_swap_get_used_space(void)
1614{
1615 uint64_t used_space = 0;
1616
1617 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
1618
1619 return used_space;
1620}
1621
1622uint64_t
1623vm_swap_get_free_space(void)
1624{
1625 return (vm_swap_get_total_space() - vm_swap_get_used_space());
1626}
fe8ab488
A
1627
1628
1629int
1630vm_swap_low_on_space(void)
1631{
1632
1633 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
1634 return (0);
1635
1636 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
1637
1638 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1639 return (0);
1640
1641 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
1642 return (1);
1643 }
1644 return (0);
1645}