]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_compressor_backing_store.c
xnu-2782.1.97.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
CommitLineData
39236c6e
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include "vm_compressor_backing_store.h"
30#include <vm/vm_protos.h>
31
32#include <IOKit/IOHibernatePrivate.h>
33
34
35boolean_t compressor_store_stop_compaction = FALSE;
36boolean_t vm_swap_up = FALSE;
fe8ab488
A
37boolean_t vm_swapfile_create_needed = FALSE;
38boolean_t vm_swapfile_gc_needed = FALSE;
39236c6e
A
39
40int swapper_throttle = -1;
41boolean_t swapper_throttle_inited = FALSE;
42uint64_t vm_swapout_thread_id;
43
44uint64_t vm_swap_put_failures = 0;
45uint64_t vm_swap_get_failures = 0;
46int vm_num_swap_files = 0;
47int vm_swapout_thread_processed_segments = 0;
48int vm_swapout_thread_awakened = 0;
fe8ab488
A
49int vm_swapfile_create_thread_awakened = 0;
50int vm_swapfile_create_thread_running = 0;
51int vm_swapfile_gc_thread_awakened = 0;
52int vm_swapfile_gc_thread_running = 0;
39236c6e
A
53
54unsigned int vm_swapfile_total_segs_alloced = 0;
55unsigned int vm_swapfile_total_segs_used = 0;
56
57
58#define SWAP_READY 0x1 /* Swap file is ready to be used */
59#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
60#define SWAP_WANTED 0x4 /* Swap file has waiters */
61#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
62
63struct swapfile{
64 queue_head_t swp_queue; /* list of swap files */
65 char *swp_path; /* saved pathname of swap file */
66 struct vnode *swp_vp; /* backing vnode */
67 uint64_t swp_size; /* size of this swap file */
68 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
69 unsigned int swp_pathlen; /* length of pathname */
70 unsigned int swp_nsegs; /* #segments we can use */
71 unsigned int swp_nseginuse; /* #segments in use */
72 unsigned int swp_index; /* index of this swap file */
73 unsigned int swp_flags; /* state of swap file */
74 unsigned int swp_free_hint; /* offset of 1st free chunk */
75 unsigned int swp_io_count; /* count of outstanding I/Os */
76 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
77
78 struct trim_list *swp_delayed_trim_list_head;
79 unsigned int swp_delayed_trim_count;
39236c6e
A
80};
81
82queue_head_t swf_global_queue;
fe8ab488 83boolean_t swp_trim_supported = FALSE;
39236c6e
A
84
85#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
86
87extern clock_sec_t dont_trim_until_ts;
88clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
fe8ab488
A
89clock_sec_t vm_swapfile_last_successful_create_ts = 0;
90int vm_swapfile_can_be_created = FALSE;
91boolean_t delayed_trim_handling_in_progress = FALSE;
39236c6e
A
92
93static void vm_swapout_thread_throttle_adjust(void);
94static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
95static void vm_swapout_thread(void);
fe8ab488
A
96static void vm_swapfile_create_thread(void);
97static void vm_swapfile_gc_thread(void);
39236c6e
A
98static void vm_swap_defragment();
99static void vm_swap_handle_delayed_trims(boolean_t);
100static void vm_swap_do_delayed_trim();
fe8ab488
A
101static void vm_swap_wait_on_trim_handling_in_progress(void);
102
39236c6e
A
103
104
39236c6e
A
105#define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
106#define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
fe8ab488 107#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
39236c6e
A
108#define VM_SWAP_SHOULD_CREATE(cur_ts) (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
109 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
110#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
111
112
fe8ab488
A
113#define VM_SWAPFILE_DELAYED_CREATE 15
114
39236c6e
A
115#define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
116
117
118#if CHECKSUM_THE_SWAP
119extern unsigned int hash_string(char *cp, int len);
120#endif
121
fe8ab488 122#if ENCRYPTED_SWAP
39236c6e
A
123extern boolean_t swap_crypt_ctx_initialized;
124extern void swap_crypt_ctx_initialize(void);
125extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE];
126extern aes_ctx swap_crypt_ctx;
127extern unsigned long vm_page_encrypt_counter;
128extern unsigned long vm_page_decrypt_counter;
fe8ab488 129#endif /* ENCRYPTED_SWAP */
39236c6e
A
130
131extern void vm_pageout_io_throttle(void);
fe8ab488
A
132extern void vm_pageout_reinit_tuneables(void);
133extern void vm_swap_file_set_tuneables(void);
39236c6e
A
134
135struct swapfile *vm_swapfile_for_handle(uint64_t);
136
137/*
138 * Called with the vm_swap_data_lock held.
139 */
140
141struct swapfile *
142vm_swapfile_for_handle(uint64_t f_offset)
143{
144
145 uint64_t file_offset = 0;
146 unsigned int swapfile_index = 0;
147 struct swapfile* swf = NULL;
148
149 file_offset = (f_offset & SWAP_SLOT_MASK);
150 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
151
152 swf = (struct swapfile*) queue_first(&swf_global_queue);
153
154 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
155
156 if (swapfile_index == swf->swp_index) {
157 break;
158 }
159
160 swf = (struct swapfile*) queue_next(&swf->swp_queue);
161 }
162
163 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
164 swf = NULL;
165 }
166
167 return swf;
168}
169
170void
fe8ab488 171vm_compressor_swap_init()
39236c6e 172{
39236c6e
A
173 thread_t thread = NULL;
174
39236c6e
A
175 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
176 lck_grp_init(&vm_swap_data_lock_grp,
177 "vm_swap_data",
178 &vm_swap_data_lock_grp_attr);
179 lck_attr_setdefault(&vm_swap_data_lock_attr);
180 lck_mtx_init_ext(&vm_swap_data_lock,
181 &vm_swap_data_lock_ext,
182 &vm_swap_data_lock_grp,
183 &vm_swap_data_lock_attr);
184
185 queue_init(&swf_global_queue);
186
39236c6e 187
fe8ab488 188 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
39236c6e 189 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
fe8ab488
A
190 panic("vm_swapout_thread: create failed");
191 }
192 thread->options |= TH_OPT_VMPRIV;
193 vm_swapout_thread_id = thread->thread_id;
39236c6e 194
fe8ab488 195 thread_deallocate(thread);
39236c6e 196
fe8ab488
A
197 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
198 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
199 panic("vm_swapfile_create_thread: create failed");
200 }
201 thread->options |= TH_OPT_VMPRIV;
39236c6e 202
fe8ab488 203 thread_deallocate(thread);
39236c6e 204
39236c6e 205
fe8ab488
A
206 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
207 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
208 panic("vm_swapfile_gc_thread: create failed");
209 }
210 thread_deallocate(thread);
39236c6e 211
fe8ab488
A
212 proc_set_task_policy_thread(kernel_task, thread->thread_id,
213 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
214 proc_set_task_policy_thread(kernel_task, thread->thread_id,
215 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
39236c6e 216
fe8ab488
A
217#if ENCRYPTED_SWAP
218 if (swap_crypt_ctx_initialized == FALSE) {
219 swap_crypt_ctx_initialize();
220 }
221#endif /* ENCRYPTED_SWAP */
222
223 memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1);
39236c6e 224
fe8ab488 225 vm_swap_up = TRUE;
39236c6e 226
fe8ab488
A
227 printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF");
228}
39236c6e 229
39236c6e 230
fe8ab488
A
231void
232vm_swap_file_set_tuneables()
233{
234 struct vnode *vp;
235 char *pathname;
236 int namelen;
39236c6e 237
fe8ab488
A
238 if (strlen(swapfilename) == 0) {
239 /*
240 * If no swapfile name has been set, we'll
241 * use the default name.
242 *
243 * Also, this function is only called from the vm_pageout_scan thread
244 * via vm_consider_waking_compactor_swapper,
245 * so we don't need to worry about a race in checking/setting the name here.
246 */
247 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
248 }
249 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
250 pathname = (char*)kalloc(namelen);
251 memset(pathname, 0, namelen);
252 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
39236c6e 253
fe8ab488 254 vm_swapfile_open(pathname, &vp);
39236c6e 255
fe8ab488
A
256 if (vp == NULL)
257 goto done;
39236c6e 258
fe8ab488
A
259 if (vnode_pager_isSSD(vp) == FALSE)
260 vm_pageout_reinit_tuneables();
261 vnode_setswapmount(vp);
262 vm_swapfile_close((uint64_t)pathname, vp);
39236c6e 263done:
fe8ab488 264 kfree(pathname, namelen);
39236c6e
A
265}
266
fe8ab488
A
267
268#if ENCRYPTED_SWAP
39236c6e
A
269void
270vm_swap_encrypt(c_segment_t c_seg)
271{
272 vm_offset_t kernel_vaddr = 0;
273 uint64_t size = 0;
274
275 union {
276 unsigned char aes_iv[AES_BLOCK_SIZE];
277 void *c_seg;
278 } encrypt_iv;
279
280 assert(swap_crypt_ctx_initialized);
281
282 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
283
284 encrypt_iv.c_seg = (void*)c_seg;
285
286 /* encrypt the "initial vector" */
287 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
288 swap_crypt_null_iv,
289 1,
290 &encrypt_iv.aes_iv[0],
291 &swap_crypt_ctx.encrypt);
292
293 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
294 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
295
296 /*
297 * Encrypt the c_segment.
298 */
299 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
300 &encrypt_iv.aes_iv[0],
301 (unsigned int)(size / AES_BLOCK_SIZE),
302 (unsigned char *) kernel_vaddr,
303 &swap_crypt_ctx.encrypt);
304
305 vm_page_encrypt_counter += (size/PAGE_SIZE_64);
306}
307
308void
309vm_swap_decrypt(c_segment_t c_seg)
310{
311
312 vm_offset_t kernel_vaddr = 0;
313 uint64_t size = 0;
314
315 union {
316 unsigned char aes_iv[AES_BLOCK_SIZE];
317 void *c_seg;
318 } decrypt_iv;
319
320
321 assert(swap_crypt_ctx_initialized);
322
323 /*
324 * Prepare an "initial vector" for the decryption.
325 * It has to be the same as the "initial vector" we
326 * used to encrypt that page.
327 */
328 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
329
330 decrypt_iv.c_seg = (void*)c_seg;
331
332 /* encrypt the "initial vector" */
333 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
334 swap_crypt_null_iv,
335 1,
336 &decrypt_iv.aes_iv[0],
337 &swap_crypt_ctx.encrypt);
338
339 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
340 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
341
342 /*
343 * Decrypt the c_segment.
344 */
345 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
346 &decrypt_iv.aes_iv[0],
347 (unsigned int) (size / AES_BLOCK_SIZE),
348 (unsigned char *) kernel_vaddr,
349 &swap_crypt_ctx.decrypt);
350
351 vm_page_decrypt_counter += (size/PAGE_SIZE_64);
352}
fe8ab488 353#endif /* ENCRYPTED_SWAP */
39236c6e
A
354
355
356void
357vm_swap_consider_defragmenting()
358{
fe8ab488
A
359 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
360 (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
39236c6e 361
fe8ab488 362 if (!vm_swapfile_gc_thread_running) {
39236c6e
A
363 lck_mtx_lock(&vm_swap_data_lock);
364
fe8ab488
A
365 if (!vm_swapfile_gc_thread_running)
366 thread_wakeup((event_t) &vm_swapfile_gc_needed);
39236c6e
A
367
368 lck_mtx_unlock(&vm_swap_data_lock);
369 }
370 }
371}
372
373
374int vm_swap_defragment_yielded = 0;
375int vm_swap_defragment_swapin = 0;
376int vm_swap_defragment_free = 0;
377int vm_swap_defragment_busy = 0;
378
379
380static void
381vm_swap_defragment()
382{
383 c_segment_t c_seg;
384
385 /*
386 * have to grab the master lock w/o holding
387 * any locks in spin mode
388 */
389 PAGE_REPLACEMENT_DISALLOWED(TRUE);
390
391 lck_mtx_lock_spin_always(c_list_lock);
392
393 while (!queue_empty(&c_swappedout_sparse_list_head)) {
394
395 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
396 vm_swap_defragment_yielded++;
397 break;
398 }
399 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
400
401 lck_mtx_lock_spin_always(&c_seg->c_lock);
402
403 assert(c_seg->c_on_swappedout_sparse_q);
404
405 if (c_seg->c_busy) {
406 lck_mtx_unlock_always(c_list_lock);
407
408 PAGE_REPLACEMENT_DISALLOWED(FALSE);
409 /*
410 * c_seg_wait_on_busy consumes c_seg->c_lock
411 */
412 c_seg_wait_on_busy(c_seg);
413
414 PAGE_REPLACEMENT_DISALLOWED(TRUE);
415
416 lck_mtx_lock_spin_always(c_list_lock);
417
418 vm_swap_defragment_busy++;
419 continue;
420 }
421 if (c_seg->c_bytes_used == 0) {
422 /*
423 * c_seg_free_locked consumes the c_list_lock
424 * and c_seg->c_lock
425 */
426 c_seg_free_locked(c_seg);
427
428 vm_swap_defragment_free++;
429 } else {
430 lck_mtx_unlock_always(c_list_lock);
431
432 c_seg_swapin(c_seg, TRUE);
433 lck_mtx_unlock_always(&c_seg->c_lock);
434
435 vm_swap_defragment_swapin++;
436 }
437 PAGE_REPLACEMENT_DISALLOWED(FALSE);
438
439 vm_pageout_io_throttle();
440
441 /*
442 * because write waiters have privilege over readers,
443 * dropping and immediately retaking the master lock will
444 * still allow any thread waiting to acquire the
445 * master lock exclusively an opportunity to take it
446 */
447 PAGE_REPLACEMENT_DISALLOWED(TRUE);
448
449 lck_mtx_lock_spin_always(c_list_lock);
450 }
451 lck_mtx_unlock_always(c_list_lock);
452
453 PAGE_REPLACEMENT_DISALLOWED(FALSE);
454}
455
456
457
458static void
fe8ab488 459vm_swapfile_create_thread(void)
39236c6e 460{
39236c6e
A
461 clock_sec_t sec;
462 clock_nsec_t nsec;
463
fe8ab488
A
464 vm_swapfile_create_thread_awakened++;
465 vm_swapfile_create_thread_running = 1;
39236c6e 466
fe8ab488 467 while (TRUE) {
39236c6e
A
468 /*
469 * walk through the list of swap files
470 * and do the delayed frees/trims for
471 * any swap file whose count of delayed
472 * frees is above the batch limit
473 */
474 vm_swap_handle_delayed_trims(FALSE);
475
fe8ab488 476 lck_mtx_lock(&vm_swap_data_lock);
39236c6e 477
fe8ab488 478 clock_get_system_nanotime(&sec, &nsec);
39236c6e 479
fe8ab488
A
480 if (VM_SWAP_SHOULD_CREATE(sec) == 0)
481 break;
39236c6e 482
fe8ab488 483 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 484
fe8ab488
A
485 if (vm_swap_create_file() == FALSE) {
486 vm_swapfile_last_failed_to_create_ts = sec;
487 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
39236c6e 488
fe8ab488
A
489 } else
490 vm_swapfile_last_successful_create_ts = sec;
491 }
492 vm_swapfile_create_thread_running = 0;
39236c6e 493
fe8ab488 494 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
39236c6e 495
fe8ab488 496 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 497
fe8ab488
A
498 thread_block((thread_continue_t)vm_swapfile_create_thread);
499
500 /* NOTREACHED */
501}
39236c6e 502
39236c6e 503
fe8ab488
A
504static void
505vm_swapfile_gc_thread(void)
506{
507 boolean_t need_defragment;
508 boolean_t need_reclaim;
509
510 vm_swapfile_gc_thread_awakened++;
511 vm_swapfile_gc_thread_running = 1;
512
513 while (TRUE) {
514
515 lck_mtx_lock(&vm_swap_data_lock);
516
517 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
518 break;
519
520 need_defragment = FALSE;
521 need_reclaim = FALSE;
522
523 if (VM_SWAP_SHOULD_DEFRAGMENT())
524 need_defragment = TRUE;
525
526 if (VM_SWAP_SHOULD_RECLAIM()) {
527 need_defragment = TRUE;
528 need_reclaim = TRUE;
529 }
530 if (need_defragment == FALSE && need_reclaim == FALSE)
531 break;
532
39236c6e 533 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 534
fe8ab488
A
535 if (need_defragment == TRUE)
536 vm_swap_defragment();
537 if (need_reclaim == TRUE)
538 vm_swap_reclaim();
539 }
540 vm_swapfile_gc_thread_running = 0;
39236c6e 541
fe8ab488 542 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
39236c6e
A
543
544 lck_mtx_unlock(&vm_swap_data_lock);
545
fe8ab488 546 thread_block((thread_continue_t)vm_swapfile_gc_thread);
39236c6e
A
547
548 /* NOTREACHED */
549}
550
551
552
553int swapper_entered_T0 = 0;
554int swapper_entered_T1 = 0;
555int swapper_entered_T2 = 0;
556
557static void
558vm_swapout_thread_throttle_adjust(void)
559{
560 int swapper_throttle_new;
561
562 if (swapper_throttle_inited == FALSE) {
563 /*
564 * force this thread to be set to the correct
565 * throttling tier
566 */
567 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
568 swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
569 swapper_throttle_inited = TRUE;
570 swapper_entered_T2++;
571 goto done;
572 }
573 swapper_throttle_new = swapper_throttle;
574
575
576 switch(swapper_throttle) {
577
578 case THROTTLE_LEVEL_COMPRESSOR_TIER2:
579
580 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) {
581 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
582 swapper_entered_T1++;
583 break;
584 }
585 break;
586
587 case THROTTLE_LEVEL_COMPRESSOR_TIER1:
588
589 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
590 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0;
591 swapper_entered_T0++;
592 break;
593 }
594 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) {
595 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
596 swapper_entered_T2++;
597 break;
598 }
599 break;
600
601 case THROTTLE_LEVEL_COMPRESSOR_TIER0:
602
603 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
604 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
605 swapper_entered_T2++;
606 break;
607 }
608 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
609 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
610 swapper_entered_T1++;
611 break;
612 }
613 break;
614 }
615done:
616 if (swapper_throttle != swapper_throttle_new) {
617 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
618 TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
619 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
620 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
621
622 swapper_throttle = swapper_throttle_new;
623 }
624}
625
626
627static void
628vm_swapout_thread(void)
629{
630 uint64_t f_offset = 0;
631 uint32_t size = 0;
632 c_segment_t c_seg = NULL;
633 kern_return_t kr = KERN_SUCCESS;
634 vm_offset_t addr = 0;
635
636 vm_swapout_thread_awakened++;
637
638 lck_mtx_lock_spin_always(c_list_lock);
639
640 while (!queue_empty(&c_swapout_list_head)) {
641
642 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
643
644 lck_mtx_lock_spin_always(&c_seg->c_lock);
645
646 assert(c_seg->c_on_swapout_q);
647
648 if (c_seg->c_busy) {
39236c6e
A
649 lck_mtx_unlock_always(c_list_lock);
650
8a3053a0 651 c_seg_wait_on_busy(c_seg);
39236c6e
A
652
653 lck_mtx_lock_spin_always(c_list_lock);
654
655 continue;
656 }
657 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
658 c_seg->c_on_swapout_q = 0;
659 c_swapout_count--;
660
39236c6e
A
661 vm_swapout_thread_processed_segments++;
662
663 thread_wakeup((event_t)&compaction_swapper_running);
664
8a3053a0
A
665 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
666
667 if (size == 0) {
668 c_seg_free_locked(c_seg);
669 goto c_seg_was_freed;
670 }
fe8ab488 671 C_SEG_BUSY(c_seg);
8a3053a0
A
672 c_seg->c_busy_swapping = 1;
673
39236c6e
A
674 lck_mtx_unlock_always(c_list_lock);
675
676 addr = (vm_offset_t) c_seg->c_store.c_buffer;
677
39236c6e
A
678 lck_mtx_unlock_always(&c_seg->c_lock);
679
680#if CHECKSUM_THE_SWAP
681 c_seg->cseg_hash = hash_string((char*)addr, (int)size);
682 c_seg->cseg_swap_size = size;
683#endif /* CHECKSUM_THE_SWAP */
684
fe8ab488 685#if ENCRYPTED_SWAP
39236c6e 686 vm_swap_encrypt(c_seg);
fe8ab488 687#endif /* ENCRYPTED_SWAP */
39236c6e
A
688
689 vm_swapout_thread_throttle_adjust();
690
691 kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg);
692
693 PAGE_REPLACEMENT_DISALLOWED(TRUE);
694
695 lck_mtx_lock_spin_always(c_list_lock);
696 lck_mtx_lock_spin_always(&c_seg->c_lock);
697
698 if (kr == KERN_SUCCESS) {
699
700 if (C_SEG_ONDISK_IS_SPARSE(c_seg) && hibernate_flushing == FALSE) {
701
702 c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg);
703 c_seg->c_on_swappedout_sparse_q = 1;
704 c_swappedout_sparse_count++;
705
706 } else {
707 if (hibernate_flushing == TRUE && (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
708 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id))
709 queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
710 else
711 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
712 c_seg->c_on_swappedout_q = 1;
713 c_swappedout_count++;
714 }
715 c_seg->c_store.c_swap_handle = f_offset;
716 c_seg->c_ondisk = 1;
717
718 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
719
720 if (c_seg->c_bytes_used)
721 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
722 } else {
fe8ab488 723#if ENCRYPTED_SWAP
39236c6e 724 vm_swap_decrypt(c_seg);
fe8ab488 725#endif /* ENCRYPTED_SWAP */
39236c6e
A
726 c_seg_insert_into_q(&c_age_list_head, c_seg);
727 c_seg->c_on_age_q = 1;
728 c_age_count++;
729
730 vm_swap_put_failures++;
731 }
732 lck_mtx_unlock_always(c_list_lock);
733
39236c6e
A
734 if (c_seg->c_must_free)
735 c_seg_free(c_seg);
fe8ab488
A
736 else {
737 c_seg->c_busy_swapping = 0;
738 C_SEG_WAKEUP_DONE(c_seg);
39236c6e 739 lck_mtx_unlock_always(&c_seg->c_lock);
fe8ab488 740 }
39236c6e
A
741
742 if (kr == KERN_SUCCESS)
743 kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
744
745 PAGE_REPLACEMENT_DISALLOWED(FALSE);
746
fe8ab488 747 if (kr == KERN_SUCCESS) {
39236c6e 748 kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE);
fe8ab488
A
749 OSAddAtomic64(-C_SEG_ALLOCSIZE, &compressor_kvspace_used);
750 }
39236c6e 751 vm_pageout_io_throttle();
8a3053a0 752c_seg_was_freed:
39236c6e
A
753 if (c_swapout_count == 0)
754 vm_swap_consider_defragmenting();
755
756 lck_mtx_lock_spin_always(c_list_lock);
757 }
758
759 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
760
761 lck_mtx_unlock_always(c_list_lock);
762
763 thread_block((thread_continue_t)vm_swapout_thread);
764
765 /* NOTREACHED */
766}
767
768boolean_t
769vm_swap_create_file()
770{
771 uint64_t size = 0;
772 int namelen = 0;
773 boolean_t swap_file_created = FALSE;
774 boolean_t swap_file_reuse = FALSE;
775 struct swapfile *swf = NULL;
776
39236c6e
A
777 /*
778 * Any swapfile structure ready for re-use?
779 */
780
781 lck_mtx_lock(&vm_swap_data_lock);
782
783 swf = (struct swapfile*) queue_first(&swf_global_queue);
784
785 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
786 if (swf->swp_flags == SWAP_REUSE) {
787 swap_file_reuse = TRUE;
788 break;
789 }
790 swf = (struct swapfile*) queue_next(&swf->swp_queue);
791 }
792
793 lck_mtx_unlock(&vm_swap_data_lock);
794
795 if (swap_file_reuse == FALSE) {
796
fe8ab488
A
797 if (strlen(swapfilename) == 0) {
798 /*
799 * If no swapfile name has been set, we'll
800 * use the default name.
801 *
802 * Also, this function is only called from the swapfile management thread.
803 * So we don't need to worry about a race in checking/setting the name here.
804 */
805
806 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
807 }
808
809 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
39236c6e
A
810
811 swf = (struct swapfile*) kalloc(sizeof *swf);
812 memset(swf, 0, sizeof(*swf));
813
814 swf->swp_index = vm_num_swap_files + 1;
815 swf->swp_pathlen = namelen;
816 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
817
818 memset(swf->swp_path, 0, namelen);
819
fe8ab488 820 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
39236c6e
A
821 }
822
823 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
824
825 if (swf->swp_vp == NULL) {
826 if (swap_file_reuse == FALSE) {
827 kfree(swf->swp_path, swf->swp_pathlen);
828 kfree(swf, sizeof *swf);
829 }
830 return FALSE;
831 }
fe8ab488
A
832 vm_swapfile_can_be_created = TRUE;
833
39236c6e
A
834 size = MAX_SWAP_FILE_SIZE;
835
836 while (size >= MIN_SWAP_FILE_SIZE) {
837
838 if (vm_swapfile_preallocate(swf->swp_vp, &size) == 0) {
839
840 int num_bytes_for_bitmap = 0;
841
842 swap_file_created = TRUE;
843
844 swf->swp_size = size;
845 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
846 swf->swp_nseginuse = 0;
847 swf->swp_free_hint = 0;
848
849 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1);
850 /*
851 * Allocate a bitmap that describes the
852 * number of segments held by this swapfile.
853 */
854 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
855 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
856
857 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
858 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
859
860 /*
861 * passing a NULL trim_list into vnode_trim_list
862 * will return ENOTSUP if trim isn't supported
863 * and 0 if it is
864 */
fe8ab488
A
865 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
866 swp_trim_supported = TRUE;
39236c6e
A
867
868 lck_mtx_lock(&vm_swap_data_lock);
869
870 swf->swp_flags = SWAP_READY;
871
872 if (swap_file_reuse == FALSE) {
873 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
874 }
875
876 vm_num_swap_files++;
877
878 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
879
880 lck_mtx_unlock(&vm_swap_data_lock);
881
882 thread_wakeup((event_t) &vm_num_swap_files);
883
884 break;
885 } else {
886
887 size = size / 2;
888 }
889 }
890 if (swap_file_created == FALSE) {
891
892 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
893
894 swf->swp_vp = NULL;
895
896 if (swap_file_reuse == FALSE) {
897 kfree(swf->swp_path, swf->swp_pathlen);
898 kfree(swf, sizeof *swf);
899 }
900 }
901 return swap_file_created;
902}
903
904
905kern_return_t
906vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size)
907{
908 struct swapfile *swf = NULL;
909 uint64_t file_offset = 0;
fe8ab488 910 int retval = 0;
39236c6e
A
911
912 if (addr == 0) {
913 return KERN_FAILURE;
914 }
915
916 lck_mtx_lock(&vm_swap_data_lock);
917
918 swf = vm_swapfile_for_handle(f_offset);
919
fe8ab488
A
920 if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
921 retval = 1;
922 goto done;
39236c6e 923 }
fe8ab488
A
924 swf->swp_io_count++;
925
926 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 927
fe8ab488 928 file_offset = (f_offset & SWAP_SLOT_MASK);
39236c6e
A
929 retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ);
930
fe8ab488
A
931 if (retval == 0)
932 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
933 else
934 vm_swap_get_failures++;
935
39236c6e
A
936 /*
937 * Free this slot in the swap structure.
938 */
939 vm_swap_free(f_offset);
940
941 lck_mtx_lock(&vm_swap_data_lock);
942 swf->swp_io_count--;
943
944 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
945
946 swf->swp_flags &= ~SWAP_WANTED;
947 thread_wakeup((event_t) &swf->swp_flags);
948 }
fe8ab488 949done:
39236c6e
A
950 lck_mtx_unlock(&vm_swap_data_lock);
951
952 if (retval == 0)
953 return KERN_SUCCESS;
fe8ab488 954 else
39236c6e 955 return KERN_FAILURE;
39236c6e
A
956}
957
958kern_return_t
959vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg)
960{
961 unsigned int segidx = 0;
962 struct swapfile *swf = NULL;
963 uint64_t file_offset = 0;
964 uint64_t swapfile_index = 0;
965 unsigned int byte_for_segidx = 0;
966 unsigned int offset_within_byte = 0;
967 boolean_t swf_eligible = FALSE;
968 boolean_t waiting = FALSE;
fe8ab488 969 boolean_t retried = FALSE;
39236c6e
A
970 int error = 0;
971 clock_sec_t sec;
972 clock_nsec_t nsec;
973
974 if (addr == 0 || f_offset == NULL) {
975 return KERN_FAILURE;
976 }
fe8ab488 977retry:
39236c6e
A
978 lck_mtx_lock(&vm_swap_data_lock);
979
980 swf = (struct swapfile*) queue_first(&swf_global_queue);
981
982 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
983
984 segidx = swf->swp_free_hint;
985
986 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
987
988 if (swf_eligible) {
989
990 while(segidx < swf->swp_nsegs) {
991
992 byte_for_segidx = segidx >> 3;
993 offset_within_byte = segidx % 8;
994
995 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
996 segidx++;
997 continue;
998 }
999
1000 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1001
1002 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1003 swf->swp_nseginuse++;
1004 swf->swp_io_count++;
1005 swapfile_index = swf->swp_index;
1006
1007 vm_swapfile_total_segs_used++;
1008
1009 clock_get_system_nanotime(&sec, &nsec);
1010
fe8ab488
A
1011 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1012 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1013
1014 lck_mtx_unlock(&vm_swap_data_lock);
1015
1016 goto done;
1017 }
1018 }
1019 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1020 }
1021 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1022
1023 /*
1024 * we've run out of swap segments, but may not
1025 * be in a position to immediately create a new swap
1026 * file if we've recently failed to create due to a lack
1027 * of free space in the root filesystem... we'll try
1028 * to kick that create off, but in any event we're going
1029 * to take a breather (up to 1 second) so that we're not caught in a tight
1030 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1031 * segments into swap files only to have them immediately put back
1032 * on the c_age queue due to vm_swap_put failing.
1033 *
1034 * if we're doing these puts due to a hibernation flush,
1035 * no need to block... setting hibernate_no_swapspace to TRUE,
1036 * will cause "vm_compressor_compact_and_swap" to immediately abort
1037 */
1038 clock_get_system_nanotime(&sec, &nsec);
1039
fe8ab488
A
1040 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1041 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1042
1043 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1044 waiting = TRUE;
1045 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
1046 } else
1047 hibernate_no_swapspace = TRUE;
1048
1049 lck_mtx_unlock(&vm_swap_data_lock);
1050
fe8ab488 1051 if (waiting == TRUE) {
39236c6e
A
1052 thread_block(THREAD_CONTINUE_NULL);
1053
fe8ab488
A
1054 if (retried == FALSE && hibernate_flushing == TRUE) {
1055 retried = TRUE;
1056 goto retry;
1057 }
1058 }
1059
39236c6e
A
1060 return KERN_FAILURE;
1061
1062done:
1063 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE);
1064
1065 lck_mtx_lock(&vm_swap_data_lock);
1066
1067 swf->swp_csegs[segidx] = c_seg;
1068
1069 swf->swp_io_count--;
1070
1071 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1072
1073 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1074
1075 swf->swp_flags &= ~SWAP_WANTED;
1076 thread_wakeup((event_t) &swf->swp_flags);
1077 }
1078
1079 lck_mtx_unlock(&vm_swap_data_lock);
1080
39236c6e
A
1081 if (error) {
1082 vm_swap_free(*f_offset);
1083
1084 return KERN_FAILURE;
1085 }
1086 return KERN_SUCCESS;
1087}
1088
1089
1090
1091static void
1092vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1093{
1094 uint64_t file_offset = 0;
1095 unsigned int segidx = 0;
1096
1097
1098 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1099
1100 unsigned int byte_for_segidx = 0;
1101 unsigned int offset_within_byte = 0;
1102
1103 file_offset = (f_offset & SWAP_SLOT_MASK);
1104 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1105
1106 byte_for_segidx = segidx >> 3;
1107 offset_within_byte = segidx % 8;
1108
1109 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1110
1111 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1112
1113 swf->swp_csegs[segidx] = NULL;
1114
1115 swf->swp_nseginuse--;
1116 vm_swapfile_total_segs_used--;
1117
1118 if (segidx < swf->swp_free_hint) {
1119 swf->swp_free_hint = segidx;
1120 }
1121 }
fe8ab488
A
1122 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1123 thread_wakeup((event_t) &vm_swapfile_gc_needed);
39236c6e 1124 }
39236c6e
A
1125}
1126
1127
1128uint32_t vm_swap_free_now_count = 0;
1129uint32_t vm_swap_free_delayed_count = 0;
1130
1131
1132void
1133vm_swap_free(uint64_t f_offset)
1134{
1135 struct swapfile *swf = NULL;
fe8ab488 1136 struct trim_list *tl = NULL;
39236c6e
A
1137 clock_sec_t sec;
1138 clock_nsec_t nsec;
1139
fe8ab488
A
1140 if (swp_trim_supported == TRUE)
1141 tl = kalloc(sizeof(struct trim_list));
1142
39236c6e
A
1143 lck_mtx_lock(&vm_swap_data_lock);
1144
1145 swf = vm_swapfile_for_handle(f_offset);
1146
1147 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1148
fe8ab488 1149 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
39236c6e
A
1150 /*
1151 * don't delay the free if the underlying disk doesn't support
1152 * trim, or we're in the midst of reclaiming this swap file since
1153 * we don't want to move segments that are technically free
1154 * but not yet handled by the delayed free mechanism
1155 */
1156 vm_swap_free_now(swf, f_offset);
1157
1158 vm_swap_free_now_count++;
fe8ab488 1159 goto done;
39236c6e 1160 }
39236c6e
A
1161 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1162 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1163
1164 tl->tl_next = swf->swp_delayed_trim_list_head;
1165 swf->swp_delayed_trim_list_head = tl;
1166 swf->swp_delayed_trim_count++;
fe8ab488 1167 tl = NULL;
39236c6e 1168
fe8ab488 1169 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
39236c6e
A
1170 clock_get_system_nanotime(&sec, &nsec);
1171
1172 if (sec > dont_trim_until_ts)
fe8ab488 1173 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1174 }
1175 vm_swap_free_delayed_count++;
1176 }
fe8ab488 1177done:
39236c6e 1178 lck_mtx_unlock(&vm_swap_data_lock);
fe8ab488
A
1179
1180 if (tl != NULL)
1181 kfree(tl, sizeof(struct trim_list));
39236c6e
A
1182}
1183
1184
fe8ab488
A
1185static void
1186vm_swap_wait_on_trim_handling_in_progress()
1187{
1188 while (delayed_trim_handling_in_progress == TRUE) {
1189
1190 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1191 lck_mtx_unlock(&vm_swap_data_lock);
1192
1193 thread_block(THREAD_CONTINUE_NULL);
1194
1195 lck_mtx_lock(&vm_swap_data_lock);
1196 }
1197}
1198
1199
39236c6e
A
1200static void
1201vm_swap_handle_delayed_trims(boolean_t force_now)
1202{
1203 struct swapfile *swf = NULL;
1204
1205 /*
fe8ab488
A
1206 * serialize the race between us and vm_swap_reclaim...
1207 * if vm_swap_reclaim wins it will turn off SWAP_READY
1208 * on the victim it has chosen... we can just skip over
1209 * that file since vm_swap_reclaim will first process
1210 * all of the delayed trims associated with it
1211 */
1212 lck_mtx_lock(&vm_swap_data_lock);
1213
1214 delayed_trim_handling_in_progress = TRUE;
1215
1216 lck_mtx_unlock(&vm_swap_data_lock);
1217
1218 /*
1219 * no need to hold the lock to walk the swf list since
1220 * vm_swap_create (the only place where we add to this list)
1221 * is run on the same thread as this function
1222 * and vm_swap_reclaim doesn't remove items from this list
1223 * instead marking them with SWAP_REUSE for future re-use
39236c6e
A
1224 */
1225 swf = (struct swapfile*) queue_first(&swf_global_queue);
1226
1227 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1228
fe8ab488 1229 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
39236c6e 1230
fe8ab488 1231 assert(!(swf->swp_flags & SWAP_RECLAIM));
39236c6e 1232 vm_swap_do_delayed_trim(swf);
fe8ab488 1233 }
39236c6e
A
1234 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1235 }
fe8ab488
A
1236 lck_mtx_lock(&vm_swap_data_lock);
1237
1238 delayed_trim_handling_in_progress = FALSE;
1239 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
39236c6e 1240
fe8ab488
A
1241 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1242 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1243
1244 lck_mtx_unlock(&vm_swap_data_lock);
1245
1246}
39236c6e
A
1247
1248static void
1249vm_swap_do_delayed_trim(struct swapfile *swf)
1250{
1251 struct trim_list *tl, *tl_head;
1252
1253 lck_mtx_lock(&vm_swap_data_lock);
1254
1255 tl_head = swf->swp_delayed_trim_list_head;
1256 swf->swp_delayed_trim_list_head = NULL;
1257 swf->swp_delayed_trim_count = 0;
1258
1259 lck_mtx_unlock(&vm_swap_data_lock);
1260
fe8ab488 1261 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
39236c6e
A
1262
1263 while ((tl = tl_head) != NULL) {
1264 unsigned int segidx = 0;
1265 unsigned int byte_for_segidx = 0;
1266 unsigned int offset_within_byte = 0;
1267
1268 lck_mtx_lock(&vm_swap_data_lock);
1269
1270 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1271
1272 byte_for_segidx = segidx >> 3;
1273 offset_within_byte = segidx % 8;
1274
1275 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1276
1277 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1278
1279 swf->swp_csegs[segidx] = NULL;
1280
1281 swf->swp_nseginuse--;
1282 vm_swapfile_total_segs_used--;
1283
1284 if (segidx < swf->swp_free_hint) {
1285 swf->swp_free_hint = segidx;
1286 }
1287 }
1288 lck_mtx_unlock(&vm_swap_data_lock);
1289
1290 tl_head = tl->tl_next;
1291
1292 kfree(tl, sizeof(struct trim_list));
1293 }
1294}
1295
1296
1297void
1298vm_swap_flush()
1299{
1300 return;
1301}
1302
1303int vm_swap_reclaim_yielded = 0;
1304
1305void
1306vm_swap_reclaim(void)
1307{
1308 vm_offset_t addr = 0;
1309 unsigned int segidx = 0;
1310 uint64_t f_offset = 0;
1311 struct swapfile *swf = NULL;
1312 struct swapfile *smallest_swf = NULL;
1313 unsigned int min_nsegs = 0;
1314 unsigned int byte_for_segidx = 0;
1315 unsigned int offset_within_byte = 0;
1316 uint32_t c_size = 0;
1317
1318 c_segment_t c_seg = NULL;
1319
1320 if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT) != KERN_SUCCESS) {
1321 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1322 }
1323
1324 lck_mtx_lock(&vm_swap_data_lock);
1325
fe8ab488
A
1326 /*
1327 * if we're running the swapfile list looking for
1328 * candidates with delayed trims, we need to
1329 * wait before making our decision concerning
1330 * the swapfile we want to reclaim
1331 */
1332 vm_swap_wait_on_trim_handling_in_progress();
1333
1334 /*
1335 * from here until we knock down the SWAP_READY bit,
1336 * we need to remain behind the vm_swap_data_lock...
1337 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1338 * will not consider this swapfile for processing
1339 */
39236c6e
A
1340 swf = (struct swapfile*) queue_first(&swf_global_queue);
1341 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1342 smallest_swf = NULL;
1343
1344 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1345
1346 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1347
1348 smallest_swf = swf;
1349 min_nsegs = swf->swp_nseginuse;
1350 }
1351 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1352 }
1353
1354 if (smallest_swf == NULL)
1355 goto done;
1356
1357 swf = smallest_swf;
1358
1359
1360 swf->swp_flags &= ~SWAP_READY;
1361 swf->swp_flags |= SWAP_RECLAIM;
1362
1363 if (swf->swp_delayed_trim_count) {
1364
1365 lck_mtx_unlock(&vm_swap_data_lock);
1366
1367 vm_swap_do_delayed_trim(swf);
1368
1369 lck_mtx_lock(&vm_swap_data_lock);
1370 }
1371 segidx = 0;
1372
1373 while (segidx < swf->swp_nsegs) {
1374
1375ReTry_for_cseg:
39236c6e
A
1376 /*
1377 * Wait for outgoing I/Os.
1378 */
1379 while (swf->swp_io_count) {
1380
1381 swf->swp_flags |= SWAP_WANTED;
1382
1383 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1384 lck_mtx_unlock(&vm_swap_data_lock);
1385
1386 thread_block(THREAD_CONTINUE_NULL);
1387
1388 lck_mtx_lock(&vm_swap_data_lock);
1389 }
fe8ab488
A
1390 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1391 vm_swap_reclaim_yielded++;
1392 break;
1393 }
39236c6e
A
1394
1395 byte_for_segidx = segidx >> 3;
1396 offset_within_byte = segidx % 8;
1397
1398 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1399
1400 segidx++;
1401 continue;
1402 }
1403
1404 c_seg = swf->swp_csegs[segidx];
1405
1406 lck_mtx_lock_spin_always(&c_seg->c_lock);
1407
1408 assert(c_seg->c_ondisk);
1409
1410 if (c_seg->c_busy) {
1411
1412 c_seg->c_wanted = 1;
1413
1414 assert_wait((event_t) (c_seg), THREAD_UNINT);
1415 lck_mtx_unlock_always(&c_seg->c_lock);
1416
1417 lck_mtx_unlock(&vm_swap_data_lock);
1418
1419 thread_block(THREAD_CONTINUE_NULL);
1420
1421 lck_mtx_lock(&vm_swap_data_lock);
1422
1423 goto ReTry_for_cseg;
1424 }
1425 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1426
1427 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1428
1429 swf->swp_csegs[segidx] = NULL;
1430 swf->swp_nseginuse--;
1431
1432 vm_swapfile_total_segs_used--;
1433
1434 lck_mtx_unlock(&vm_swap_data_lock);
1435
1436 if (c_seg->c_must_free) {
fe8ab488 1437 C_SEG_BUSY(c_seg);
39236c6e
A
1438 c_seg_free(c_seg);
1439 } else {
1440
fe8ab488 1441 C_SEG_BUSY(c_seg);
39236c6e
A
1442 c_seg->c_busy_swapping = 1;
1443#if !CHECKSUM_THE_SWAP
1444 c_seg_trim_tail(c_seg);
1445#endif
39236c6e
A
1446 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1447
1448 assert(c_size <= C_SEG_BUFSIZE);
1449
39236c6e
A
1450 lck_mtx_unlock_always(&c_seg->c_lock);
1451
1452 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
1453
1454 /*
1455 * reading the data back in failed, so convert c_seg
1456 * to a swapped in c_segment that contains no data
1457 */
1458 c_seg->c_store.c_buffer = (int32_t *)NULL;
1459 c_seg_swapin_requeue(c_seg);
1460
1461 goto swap_io_failed;
1462 }
1463 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
1464
1465 if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
1466 vm_offset_t c_buffer;
1467
1468 /*
1469 * the put failed, so convert c_seg to a fully swapped in c_segment
1470 * with valid data
1471 */
1472 if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS)
1473 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
fe8ab488
A
1474 OSAddAtomic64(C_SEG_ALLOCSIZE, &compressor_kvspace_used);
1475
39236c6e
A
1476 kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR);
1477
1478 memcpy((char *)c_buffer, (char *)addr, c_size);
1479
1480 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
fe8ab488 1481#if ENCRYPTED_SWAP
39236c6e 1482 vm_swap_decrypt(c_seg);
fe8ab488 1483#endif /* ENCRYPTED_SWAP */
39236c6e
A
1484 c_seg_swapin_requeue(c_seg);
1485
1486 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
1487
1488 goto swap_io_failed;
1489 }
1490 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
1491
1492 lck_mtx_lock_spin_always(&c_seg->c_lock);
1493
1494 assert(c_seg->c_ondisk);
1495 /*
1496 * The c_seg will now know about the new location on disk.
1497 */
1498 c_seg->c_store.c_swap_handle = f_offset;
1499swap_io_failed:
1500 c_seg->c_busy_swapping = 0;
1501
1502 if (c_seg->c_must_free)
1503 c_seg_free(c_seg);
1504 else {
1505 C_SEG_WAKEUP_DONE(c_seg);
1506
1507 lck_mtx_unlock_always(&c_seg->c_lock);
1508 }
1509 }
1510 lck_mtx_lock(&vm_swap_data_lock);
1511 }
1512
1513 if (swf->swp_nseginuse) {
1514
1515 swf->swp_flags &= ~SWAP_RECLAIM;
1516 swf->swp_flags |= SWAP_READY;
1517
1518 goto done;
1519 }
1520 /*
1521 * We don't remove this inactive swf from the queue.
1522 * That way, we can re-use it when needed again and
fe8ab488
A
1523 * preserve the namespace. The delayed_trim processing
1524 * is also dependent on us not removing swfs from the queue.
39236c6e
A
1525 */
1526 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1527
1528 vm_num_swap_files--;
1529
1530 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
1531
1532 lck_mtx_unlock(&vm_swap_data_lock);
1533
1534 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1535
1536 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
1537 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
1538
1539 lck_mtx_lock(&vm_swap_data_lock);
1540
1541 swf->swp_vp = NULL;
1542 swf->swp_size = 0;
1543 swf->swp_free_hint = 0;
1544 swf->swp_nsegs = 0;
1545 swf->swp_flags = SWAP_REUSE;
1546
39236c6e 1547done:
fe8ab488 1548 thread_wakeup((event_t) &swf->swp_flags);
39236c6e
A
1549 lck_mtx_unlock(&vm_swap_data_lock);
1550
1551 kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
1552}
1553
1554
1555uint64_t
1556vm_swap_get_total_space(void)
1557{
1558 uint64_t total_space = 0;
1559
1560 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
1561
1562 return total_space;
1563}
1564
1565uint64_t
1566vm_swap_get_used_space(void)
1567{
1568 uint64_t used_space = 0;
1569
1570 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
1571
1572 return used_space;
1573}
1574
1575uint64_t
1576vm_swap_get_free_space(void)
1577{
1578 return (vm_swap_get_total_space() - vm_swap_get_used_space());
1579}
fe8ab488
A
1580
1581
1582int
1583vm_swap_low_on_space(void)
1584{
1585
1586 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
1587 return (0);
1588
1589 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
1590
1591 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1592 return (0);
1593
1594 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
1595 return (1);
1596 }
1597 return (0);
1598}