]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_compressor_backing_store.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_protos.h>
31
32 #include <IOKit/IOHibernatePrivate.h>
33
34 #include <kern/policy_internal.h>
35
36 boolean_t compressor_store_stop_compaction = FALSE;
37 boolean_t vm_swapfile_create_needed = FALSE;
38 boolean_t vm_swapfile_gc_needed = FALSE;
39
40 int swapper_throttle = -1;
41 boolean_t swapper_throttle_inited = FALSE;
42 uint64_t vm_swapout_thread_id;
43
44 uint64_t vm_swap_put_failures = 0;
45 uint64_t vm_swap_get_failures = 0;
46 int vm_num_swap_files = 0;
47 int vm_num_pinned_swap_files = 0;
48 int vm_swapout_thread_processed_segments = 0;
49 int vm_swapout_thread_awakened = 0;
50 int vm_swapfile_create_thread_awakened = 0;
51 int vm_swapfile_create_thread_running = 0;
52 int vm_swapfile_gc_thread_awakened = 0;
53 int vm_swapfile_gc_thread_running = 0;
54
55 int64_t vm_swappin_avail = 0;
56 boolean_t vm_swappin_enabled = FALSE;
57 unsigned int vm_swapfile_total_segs_alloced = 0;
58 unsigned int vm_swapfile_total_segs_used = 0;
59
60 extern vm_map_t compressor_map;
61
62
63 #define SWAP_READY 0x1 /* Swap file is ready to be used */
64 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
65 #define SWAP_WANTED 0x4 /* Swap file has waiters */
66 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
67 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
68
69
70 struct swapfile{
71 queue_head_t swp_queue; /* list of swap files */
72 char *swp_path; /* saved pathname of swap file */
73 struct vnode *swp_vp; /* backing vnode */
74 uint64_t swp_size; /* size of this swap file */
75 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
76 unsigned int swp_pathlen; /* length of pathname */
77 unsigned int swp_nsegs; /* #segments we can use */
78 unsigned int swp_nseginuse; /* #segments in use */
79 unsigned int swp_index; /* index of this swap file */
80 unsigned int swp_flags; /* state of swap file */
81 unsigned int swp_free_hint; /* offset of 1st free chunk */
82 unsigned int swp_io_count; /* count of outstanding I/Os */
83 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
84
85 struct trim_list *swp_delayed_trim_list_head;
86 unsigned int swp_delayed_trim_count;
87 };
88
89 queue_head_t swf_global_queue;
90 boolean_t swp_trim_supported = FALSE;
91
92 extern clock_sec_t dont_trim_until_ts;
93 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
94 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
95 int vm_swapfile_can_be_created = FALSE;
96 boolean_t delayed_trim_handling_in_progress = FALSE;
97
98 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
99
100 static void vm_swapout_thread_throttle_adjust(void);
101 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
102 static void vm_swapout_thread(void);
103 static void vm_swapfile_create_thread(void);
104 static void vm_swapfile_gc_thread(void);
105 static void vm_swap_defragment();
106 static void vm_swap_handle_delayed_trims(boolean_t);
107 static void vm_swap_do_delayed_trim();
108 static void vm_swap_wait_on_trim_handling_in_progress(void);
109
110
111
112 #define VM_MAX_SWAP_FILE_NUM 100
113 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
114
115 #define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
116 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
117 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
118 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
119 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
120 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
121 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
122
123
124 #define VM_SWAPFILE_DELAYED_CREATE 15
125
126 #define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
127
128
129 #if CHECKSUM_THE_SWAP
130 extern unsigned int hash_string(char *cp, int len);
131 #endif
132
133 #if RECORD_THE_COMPRESSED_DATA
134 boolean_t c_compressed_record_init_done = FALSE;
135 int c_compressed_record_write_error = 0;
136 struct vnode *c_compressed_record_vp = NULL;
137 uint64_t c_compressed_record_file_offset = 0;
138 void c_compressed_record_init(void);
139 void c_compressed_record_write(char *, int);
140 #endif
141
142 #if ENCRYPTED_SWAP
143 extern boolean_t swap_crypt_ctx_initialized;
144 extern void swap_crypt_ctx_initialize(void);
145 extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE];
146 extern aes_ctx swap_crypt_ctx;
147 extern unsigned long vm_page_encrypt_counter;
148 extern unsigned long vm_page_decrypt_counter;
149 #endif /* ENCRYPTED_SWAP */
150
151 extern void vm_pageout_io_throttle(void);
152
153 static struct swapfile *vm_swapfile_for_handle(uint64_t);
154
155 /*
156 * Called with the vm_swap_data_lock held.
157 */
158
159 static struct swapfile *
160 vm_swapfile_for_handle(uint64_t f_offset)
161 {
162
163 uint64_t file_offset = 0;
164 unsigned int swapfile_index = 0;
165 struct swapfile* swf = NULL;
166
167 file_offset = (f_offset & SWAP_SLOT_MASK);
168 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
169
170 swf = (struct swapfile*) queue_first(&swf_global_queue);
171
172 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
173
174 if (swapfile_index == swf->swp_index) {
175 break;
176 }
177
178 swf = (struct swapfile*) queue_next(&swf->swp_queue);
179 }
180
181 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
182 swf = NULL;
183 }
184
185 return swf;
186 }
187
188 void
189 vm_compressor_swap_init()
190 {
191 thread_t thread = NULL;
192
193 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
194 lck_grp_init(&vm_swap_data_lock_grp,
195 "vm_swap_data",
196 &vm_swap_data_lock_grp_attr);
197 lck_attr_setdefault(&vm_swap_data_lock_attr);
198 lck_mtx_init_ext(&vm_swap_data_lock,
199 &vm_swap_data_lock_ext,
200 &vm_swap_data_lock_grp,
201 &vm_swap_data_lock_attr);
202
203 queue_init(&swf_global_queue);
204
205
206 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
207 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
208 panic("vm_swapout_thread: create failed");
209 }
210 vm_swapout_thread_id = thread->thread_id;
211
212 thread_deallocate(thread);
213
214 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
215 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
216 panic("vm_swapfile_create_thread: create failed");
217 }
218
219 thread_deallocate(thread);
220
221 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
222 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
223 panic("vm_swapfile_gc_thread: create failed");
224 }
225 thread_deallocate(thread);
226
227 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
228 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
229 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
230 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
231
232 #if ENCRYPTED_SWAP
233 if (swap_crypt_ctx_initialized == FALSE) {
234 swap_crypt_ctx_initialize();
235 }
236 #endif /* ENCRYPTED_SWAP */
237
238 memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1);
239
240 printf("VM Swap Subsystem is ON\n");
241 }
242
243
244 #if RECORD_THE_COMPRESSED_DATA
245
246 void
247 c_compressed_record_init()
248 {
249 if (c_compressed_record_init_done == FALSE) {
250 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
251 c_compressed_record_init_done = TRUE;
252 }
253 }
254
255 void
256 c_compressed_record_write(char *buf, int size)
257 {
258 if (c_compressed_record_write_error == 0) {
259 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
260 c_compressed_record_file_offset += size;
261 }
262 }
263 #endif
264
265
266 int compaction_swapper_inited = 0;
267
268 void
269 vm_compaction_swapper_do_init(void)
270 {
271 struct vnode *vp;
272 char *pathname;
273 int namelen;
274
275 if (compaction_swapper_inited)
276 return;
277
278 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
279 compaction_swapper_inited = 1;
280 return;
281 }
282 lck_mtx_lock(&vm_swap_data_lock);
283
284 if ( !compaction_swapper_inited) {
285
286 if (strlen(swapfilename) == 0) {
287 /*
288 * If no swapfile name has been set, we'll
289 * use the default name.
290 *
291 * Also, this function is only called from the vm_pageout_scan thread
292 * via vm_consider_waking_compactor_swapper,
293 * so we don't need to worry about a race in checking/setting the name here.
294 */
295 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
296 }
297 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
298 pathname = (char*)kalloc(namelen);
299 memset(pathname, 0, namelen);
300 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
301
302 vm_swapfile_open(pathname, &vp);
303
304 if (vp) {
305
306 if (vnode_pager_isSSD(vp) == FALSE) {
307 vm_compressor_minorcompact_threshold_divisor = 18;
308 vm_compressor_majorcompact_threshold_divisor = 22;
309 vm_compressor_unthrottle_threshold_divisor = 32;
310 }
311 vnode_setswapmount(vp);
312 vm_swappin_avail = vnode_getswappin_avail(vp);
313
314 if (vm_swappin_avail)
315 vm_swappin_enabled = TRUE;
316 vm_swapfile_close((uint64_t)pathname, vp);
317 }
318 kfree(pathname, namelen);
319
320 compaction_swapper_inited = 1;
321 }
322 lck_mtx_unlock(&vm_swap_data_lock);
323 }
324
325
326
327 #if ENCRYPTED_SWAP
328 void
329 vm_swap_encrypt(c_segment_t c_seg)
330 {
331 vm_offset_t kernel_vaddr = 0;
332 uint64_t size = 0;
333
334 union {
335 unsigned char aes_iv[AES_BLOCK_SIZE];
336 void *c_seg;
337 } encrypt_iv;
338
339 assert(swap_crypt_ctx_initialized);
340
341 #if DEVELOPMENT || DEBUG
342 C_SEG_MAKE_WRITEABLE(c_seg);
343 #endif
344 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
345
346 encrypt_iv.c_seg = (void*)c_seg;
347
348 /* encrypt the "initial vector" */
349 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
350 swap_crypt_null_iv,
351 1,
352 &encrypt_iv.aes_iv[0],
353 &swap_crypt_ctx.encrypt);
354
355 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
356 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
357
358 /*
359 * Encrypt the c_segment.
360 */
361 aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
362 &encrypt_iv.aes_iv[0],
363 (unsigned int)(size / AES_BLOCK_SIZE),
364 (unsigned char *) kernel_vaddr,
365 &swap_crypt_ctx.encrypt);
366
367 vm_page_encrypt_counter += (size/PAGE_SIZE_64);
368
369 #if DEVELOPMENT || DEBUG
370 C_SEG_WRITE_PROTECT(c_seg);
371 #endif
372 }
373
374 void
375 vm_swap_decrypt(c_segment_t c_seg)
376 {
377
378 vm_offset_t kernel_vaddr = 0;
379 uint64_t size = 0;
380
381 union {
382 unsigned char aes_iv[AES_BLOCK_SIZE];
383 void *c_seg;
384 } decrypt_iv;
385
386
387 assert(swap_crypt_ctx_initialized);
388
389 #if DEVELOPMENT || DEBUG
390 C_SEG_MAKE_WRITEABLE(c_seg);
391 #endif
392 /*
393 * Prepare an "initial vector" for the decryption.
394 * It has to be the same as the "initial vector" we
395 * used to encrypt that page.
396 */
397 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
398
399 decrypt_iv.c_seg = (void*)c_seg;
400
401 /* encrypt the "initial vector" */
402 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
403 swap_crypt_null_iv,
404 1,
405 &decrypt_iv.aes_iv[0],
406 &swap_crypt_ctx.encrypt);
407
408 kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
409 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
410
411 /*
412 * Decrypt the c_segment.
413 */
414 aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
415 &decrypt_iv.aes_iv[0],
416 (unsigned int) (size / AES_BLOCK_SIZE),
417 (unsigned char *) kernel_vaddr,
418 &swap_crypt_ctx.decrypt);
419
420 vm_page_decrypt_counter += (size/PAGE_SIZE_64);
421
422 #if DEVELOPMENT || DEBUG
423 C_SEG_WRITE_PROTECT(c_seg);
424 #endif
425 }
426 #endif /* ENCRYPTED_SWAP */
427
428
429 void
430 vm_swap_consider_defragmenting()
431 {
432 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
433 (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
434
435 if (!vm_swapfile_gc_thread_running) {
436 lck_mtx_lock(&vm_swap_data_lock);
437
438 if (!vm_swapfile_gc_thread_running)
439 thread_wakeup((event_t) &vm_swapfile_gc_needed);
440
441 lck_mtx_unlock(&vm_swap_data_lock);
442 }
443 }
444 }
445
446
447 int vm_swap_defragment_yielded = 0;
448 int vm_swap_defragment_swapin = 0;
449 int vm_swap_defragment_free = 0;
450 int vm_swap_defragment_busy = 0;
451
452
453 static void
454 vm_swap_defragment()
455 {
456 c_segment_t c_seg;
457
458 /*
459 * have to grab the master lock w/o holding
460 * any locks in spin mode
461 */
462 PAGE_REPLACEMENT_DISALLOWED(TRUE);
463
464 lck_mtx_lock_spin_always(c_list_lock);
465
466 while (!queue_empty(&c_swappedout_sparse_list_head)) {
467
468 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
469 vm_swap_defragment_yielded++;
470 break;
471 }
472 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
473
474 lck_mtx_lock_spin_always(&c_seg->c_lock);
475
476 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
477
478 if (c_seg->c_busy) {
479 lck_mtx_unlock_always(c_list_lock);
480
481 PAGE_REPLACEMENT_DISALLOWED(FALSE);
482 /*
483 * c_seg_wait_on_busy consumes c_seg->c_lock
484 */
485 c_seg_wait_on_busy(c_seg);
486
487 PAGE_REPLACEMENT_DISALLOWED(TRUE);
488
489 lck_mtx_lock_spin_always(c_list_lock);
490
491 vm_swap_defragment_busy++;
492 continue;
493 }
494 if (c_seg->c_bytes_used == 0) {
495 /*
496 * c_seg_free_locked consumes the c_list_lock
497 * and c_seg->c_lock
498 */
499 C_SEG_BUSY(c_seg);
500 c_seg_free_locked(c_seg);
501
502 vm_swap_defragment_free++;
503 } else {
504 lck_mtx_unlock_always(c_list_lock);
505
506 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0)
507 lck_mtx_unlock_always(&c_seg->c_lock);
508
509 vm_swap_defragment_swapin++;
510 }
511 PAGE_REPLACEMENT_DISALLOWED(FALSE);
512
513 vm_pageout_io_throttle();
514
515 /*
516 * because write waiters have privilege over readers,
517 * dropping and immediately retaking the master lock will
518 * still allow any thread waiting to acquire the
519 * master lock exclusively an opportunity to take it
520 */
521 PAGE_REPLACEMENT_DISALLOWED(TRUE);
522
523 lck_mtx_lock_spin_always(c_list_lock);
524 }
525 lck_mtx_unlock_always(c_list_lock);
526
527 PAGE_REPLACEMENT_DISALLOWED(FALSE);
528 }
529
530
531
532 static void
533 vm_swapfile_create_thread(void)
534 {
535 clock_sec_t sec;
536 clock_nsec_t nsec;
537
538 current_thread()->options |= TH_OPT_VMPRIV;
539
540 vm_swapfile_create_thread_awakened++;
541 vm_swapfile_create_thread_running = 1;
542
543 while (TRUE) {
544 /*
545 * walk through the list of swap files
546 * and do the delayed frees/trims for
547 * any swap file whose count of delayed
548 * frees is above the batch limit
549 */
550 vm_swap_handle_delayed_trims(FALSE);
551
552 lck_mtx_lock(&vm_swap_data_lock);
553
554 if (hibernate_in_progress_with_pinned_swap == TRUE)
555 break;
556
557 clock_get_system_nanotime(&sec, &nsec);
558
559 if (VM_SWAP_SHOULD_CREATE(sec) == 0)
560 break;
561
562 lck_mtx_unlock(&vm_swap_data_lock);
563
564 if (vm_swap_create_file() == FALSE) {
565 vm_swapfile_last_failed_to_create_ts = sec;
566 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
567
568 } else
569 vm_swapfile_last_successful_create_ts = sec;
570 }
571 vm_swapfile_create_thread_running = 0;
572
573 if (hibernate_in_progress_with_pinned_swap == TRUE)
574 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
575
576 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
577
578 lck_mtx_unlock(&vm_swap_data_lock);
579
580 thread_block((thread_continue_t)vm_swapfile_create_thread);
581
582 /* NOTREACHED */
583 }
584
585
586 #if HIBERNATION
587
588 kern_return_t
589 hibernate_pin_swap(boolean_t start)
590 {
591 vm_compaction_swapper_do_init();
592
593 if (start == FALSE) {
594
595 lck_mtx_lock(&vm_swap_data_lock);
596 hibernate_in_progress_with_pinned_swap = FALSE;
597 lck_mtx_unlock(&vm_swap_data_lock);
598
599 return (KERN_SUCCESS);
600 }
601 if (vm_swappin_enabled == FALSE)
602 return (KERN_SUCCESS);
603
604 lck_mtx_lock(&vm_swap_data_lock);
605
606 hibernate_in_progress_with_pinned_swap = TRUE;
607
608 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
609
610 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
611
612 lck_mtx_unlock(&vm_swap_data_lock);
613
614 thread_block(THREAD_CONTINUE_NULL);
615
616 lck_mtx_lock(&vm_swap_data_lock);
617 }
618 if (vm_num_swap_files > vm_num_pinned_swap_files) {
619 hibernate_in_progress_with_pinned_swap = FALSE;
620 lck_mtx_unlock(&vm_swap_data_lock);
621
622 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
623 vm_num_swap_files, vm_num_pinned_swap_files);
624 return (KERN_FAILURE);
625 }
626 lck_mtx_unlock(&vm_swap_data_lock);
627
628 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
629 if (vm_swap_create_file() == FALSE)
630 break;
631 }
632 return (KERN_SUCCESS);
633 }
634 #endif
635
636 static void
637 vm_swapfile_gc_thread(void)
638
639 {
640 boolean_t need_defragment;
641 boolean_t need_reclaim;
642
643 vm_swapfile_gc_thread_awakened++;
644 vm_swapfile_gc_thread_running = 1;
645
646 while (TRUE) {
647
648 lck_mtx_lock(&vm_swap_data_lock);
649
650 if (hibernate_in_progress_with_pinned_swap == TRUE)
651 break;
652
653 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
654 break;
655
656 need_defragment = FALSE;
657 need_reclaim = FALSE;
658
659 if (VM_SWAP_SHOULD_DEFRAGMENT())
660 need_defragment = TRUE;
661
662 if (VM_SWAP_SHOULD_RECLAIM()) {
663 need_defragment = TRUE;
664 need_reclaim = TRUE;
665 }
666 if (need_defragment == FALSE && need_reclaim == FALSE)
667 break;
668
669 lck_mtx_unlock(&vm_swap_data_lock);
670
671 if (need_defragment == TRUE)
672 vm_swap_defragment();
673 if (need_reclaim == TRUE)
674 vm_swap_reclaim();
675 }
676 vm_swapfile_gc_thread_running = 0;
677
678 if (hibernate_in_progress_with_pinned_swap == TRUE)
679 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
680
681 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
682
683 lck_mtx_unlock(&vm_swap_data_lock);
684
685 thread_block((thread_continue_t)vm_swapfile_gc_thread);
686
687 /* NOTREACHED */
688 }
689
690
691
692 int swapper_entered_T0 = 0;
693 int swapper_entered_T1 = 0;
694 int swapper_entered_T2 = 0;
695
696 static void
697 vm_swapout_thread_throttle_adjust(void)
698 {
699 int swapper_throttle_new;
700
701 if (swapper_throttle_inited == FALSE) {
702 /*
703 * force this thread to be set to the correct
704 * throttling tier
705 */
706 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
707 swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
708 swapper_throttle_inited = TRUE;
709 swapper_entered_T2++;
710 goto done;
711 }
712 swapper_throttle_new = swapper_throttle;
713
714
715 switch(swapper_throttle) {
716
717 case THROTTLE_LEVEL_COMPRESSOR_TIER2:
718
719 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) {
720 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
721 swapper_entered_T1++;
722 break;
723 }
724 break;
725
726 case THROTTLE_LEVEL_COMPRESSOR_TIER1:
727
728 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
729 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0;
730 swapper_entered_T0++;
731 break;
732 }
733 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) {
734 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
735 swapper_entered_T2++;
736 break;
737 }
738 break;
739
740 case THROTTLE_LEVEL_COMPRESSOR_TIER0:
741
742 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
743 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
744 swapper_entered_T2++;
745 break;
746 }
747 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
748 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
749 swapper_entered_T1++;
750 break;
751 }
752 break;
753 }
754 done:
755 if (swapper_throttle != swapper_throttle_new) {
756 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
757 TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
758 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
759 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
760
761 swapper_throttle = swapper_throttle_new;
762 }
763 }
764
765
766 int vm_swapout_found_empty = 0;
767
768 static void
769 vm_swapout_thread(void)
770 {
771 uint64_t f_offset = 0;
772 uint32_t size = 0;
773 c_segment_t c_seg = NULL;
774 kern_return_t kr = KERN_SUCCESS;
775 vm_offset_t addr = 0;
776
777 current_thread()->options |= TH_OPT_VMPRIV;
778
779 vm_swapout_thread_awakened++;
780
781 lck_mtx_lock_spin_always(c_list_lock);
782
783 while (!queue_empty(&c_swapout_list_head)) {
784
785 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
786
787 lck_mtx_lock_spin_always(&c_seg->c_lock);
788
789 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
790
791 if (c_seg->c_busy) {
792 lck_mtx_unlock_always(c_list_lock);
793
794 c_seg_wait_on_busy(c_seg);
795
796 lck_mtx_lock_spin_always(c_list_lock);
797
798 continue;
799 }
800 vm_swapout_thread_processed_segments++;
801
802 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
803
804 if (size == 0) {
805 assert(c_seg->c_bytes_used == 0);
806
807 if (!c_seg->c_on_minorcompact_q)
808 c_seg_need_delayed_compaction(c_seg, TRUE);
809
810 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
811 lck_mtx_unlock_always(&c_seg->c_lock);
812 lck_mtx_unlock_always(c_list_lock);
813
814 vm_swapout_found_empty++;
815 goto c_seg_is_empty;
816 }
817 C_SEG_BUSY(c_seg);
818 c_seg->c_busy_swapping = 1;
819
820 lck_mtx_unlock_always(c_list_lock);
821
822 addr = (vm_offset_t) c_seg->c_store.c_buffer;
823
824 lck_mtx_unlock_always(&c_seg->c_lock);
825
826 #if CHECKSUM_THE_SWAP
827 c_seg->cseg_hash = hash_string((char*)addr, (int)size);
828 c_seg->cseg_swap_size = size;
829 #endif /* CHECKSUM_THE_SWAP */
830
831 #if ENCRYPTED_SWAP
832 vm_swap_encrypt(c_seg);
833 #endif /* ENCRYPTED_SWAP */
834
835 vm_swapout_thread_throttle_adjust();
836
837 kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg);
838
839 PAGE_REPLACEMENT_DISALLOWED(TRUE);
840
841 if (kr == KERN_SUCCESS) {
842 kernel_memory_depopulate(compressor_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
843 }
844 #if ENCRYPTED_SWAP
845 else {
846 vm_swap_decrypt(c_seg);
847 }
848 #endif /* ENCRYPTED_SWAP */
849 lck_mtx_lock_spin_always(c_list_lock);
850 lck_mtx_lock_spin_always(&c_seg->c_lock);
851
852 if (kr == KERN_SUCCESS) {
853 int new_state = C_ON_SWAPPEDOUT_Q;
854 boolean_t insert_head = FALSE;
855
856 if (hibernate_flushing == TRUE) {
857 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
858 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)
859 insert_head = TRUE;
860 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg))
861 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
862
863 c_seg_switch_state(c_seg, new_state, insert_head);
864
865 c_seg->c_store.c_swap_handle = f_offset;
866
867 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
868
869 if (c_seg->c_bytes_used)
870 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
871 } else {
872 if (c_seg->c_overage_swap == TRUE) {
873 c_seg->c_overage_swap = FALSE;
874 c_overage_swapped_count--;
875 }
876 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
877
878 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
879 c_seg_need_delayed_compaction(c_seg, TRUE);
880 }
881 assert(c_seg->c_busy_swapping);
882 assert(c_seg->c_busy);
883
884 c_seg->c_busy_swapping = 0;
885 lck_mtx_unlock_always(c_list_lock);
886
887 C_SEG_WAKEUP_DONE(c_seg);
888 lck_mtx_unlock_always(&c_seg->c_lock);
889
890 PAGE_REPLACEMENT_DISALLOWED(FALSE);
891
892 vm_pageout_io_throttle();
893 c_seg_is_empty:
894 if (c_swapout_count == 0)
895 vm_swap_consider_defragmenting();
896
897 lck_mtx_lock_spin_always(c_list_lock);
898 }
899
900 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
901
902 lck_mtx_unlock_always(c_list_lock);
903
904 thread_block((thread_continue_t)vm_swapout_thread);
905
906 /* NOTREACHED */
907 }
908
909 boolean_t
910 vm_swap_create_file()
911 {
912 uint64_t size = 0;
913 int namelen = 0;
914 boolean_t swap_file_created = FALSE;
915 boolean_t swap_file_reuse = FALSE;
916 boolean_t swap_file_pin = FALSE;
917 struct swapfile *swf = NULL;
918
919 /*
920 * make sure we've got all the info we need
921 * to potentially pin a swap file... we could
922 * be swapping out due to hibernation w/o ever
923 * having run vm_pageout_scan, which is normally
924 * the trigger to do the init
925 */
926 vm_compaction_swapper_do_init();
927
928 /*
929 * Any swapfile structure ready for re-use?
930 */
931
932 lck_mtx_lock(&vm_swap_data_lock);
933
934 swf = (struct swapfile*) queue_first(&swf_global_queue);
935
936 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
937 if (swf->swp_flags == SWAP_REUSE) {
938 swap_file_reuse = TRUE;
939 break;
940 }
941 swf = (struct swapfile*) queue_next(&swf->swp_queue);
942 }
943
944 lck_mtx_unlock(&vm_swap_data_lock);
945
946 if (swap_file_reuse == FALSE) {
947
948 if (strlen(swapfilename) == 0) {
949 /*
950 * If no swapfile name has been set, we'll
951 * use the default name.
952 *
953 * Also, this function is only called from the swapfile management thread.
954 * So we don't need to worry about a race in checking/setting the name here.
955 */
956
957 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
958 }
959
960 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
961
962 swf = (struct swapfile*) kalloc(sizeof *swf);
963 memset(swf, 0, sizeof(*swf));
964
965 swf->swp_index = vm_num_swap_files + 1;
966 swf->swp_pathlen = namelen;
967 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
968
969 memset(swf->swp_path, 0, namelen);
970
971 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
972 }
973
974 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
975
976 if (swf->swp_vp == NULL) {
977 if (swap_file_reuse == FALSE) {
978 kfree(swf->swp_path, swf->swp_pathlen);
979 kfree(swf, sizeof *swf);
980 }
981 return FALSE;
982 }
983 vm_swapfile_can_be_created = TRUE;
984
985 size = MAX_SWAP_FILE_SIZE;
986
987 while (size >= MIN_SWAP_FILE_SIZE) {
988
989 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
990
991 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
992
993 int num_bytes_for_bitmap = 0;
994
995 swap_file_created = TRUE;
996
997 swf->swp_size = size;
998 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
999 swf->swp_nseginuse = 0;
1000 swf->swp_free_hint = 0;
1001
1002 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1);
1003 /*
1004 * Allocate a bitmap that describes the
1005 * number of segments held by this swapfile.
1006 */
1007 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
1008 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
1009
1010 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
1011 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
1012
1013 /*
1014 * passing a NULL trim_list into vnode_trim_list
1015 * will return ENOTSUP if trim isn't supported
1016 * and 0 if it is
1017 */
1018 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
1019 swp_trim_supported = TRUE;
1020
1021 lck_mtx_lock(&vm_swap_data_lock);
1022
1023 swf->swp_flags = SWAP_READY;
1024
1025 if (swap_file_reuse == FALSE) {
1026 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1027 }
1028
1029 vm_num_swap_files++;
1030
1031 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1032
1033 if (swap_file_pin == TRUE) {
1034 vm_num_pinned_swap_files++;
1035 swf->swp_flags |= SWAP_PINNED;
1036 vm_swappin_avail -= swf->swp_size;
1037 }
1038
1039 lck_mtx_unlock(&vm_swap_data_lock);
1040
1041 thread_wakeup((event_t) &vm_num_swap_files);
1042 break;
1043 } else {
1044
1045 size = size / 2;
1046 }
1047 }
1048 if (swap_file_created == FALSE) {
1049
1050 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1051
1052 swf->swp_vp = NULL;
1053
1054 if (swap_file_reuse == FALSE) {
1055 kfree(swf->swp_path, swf->swp_pathlen);
1056 kfree(swf, sizeof *swf);
1057 }
1058 }
1059 return swap_file_created;
1060 }
1061
1062
1063 kern_return_t
1064 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1065 {
1066 struct swapfile *swf = NULL;
1067 uint64_t file_offset = 0;
1068 int retval = 0;
1069
1070 assert(c_seg->c_store.c_buffer);
1071
1072 lck_mtx_lock(&vm_swap_data_lock);
1073
1074 swf = vm_swapfile_for_handle(f_offset);
1075
1076 if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1077 retval = 1;
1078 goto done;
1079 }
1080 swf->swp_io_count++;
1081
1082 lck_mtx_unlock(&vm_swap_data_lock);
1083
1084 #if DEVELOPMENT || DEBUG
1085 C_SEG_MAKE_WRITEABLE(c_seg);
1086 #endif
1087 file_offset = (f_offset & SWAP_SLOT_MASK);
1088 retval = vm_swapfile_io(swf->swp_vp, file_offset, c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ);
1089
1090 #if DEVELOPMENT || DEBUG
1091 C_SEG_WRITE_PROTECT(c_seg);
1092 #endif
1093 if (retval == 0)
1094 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
1095 else
1096 vm_swap_get_failures++;
1097
1098 /*
1099 * Free this slot in the swap structure.
1100 */
1101 vm_swap_free(f_offset);
1102
1103 lck_mtx_lock(&vm_swap_data_lock);
1104 swf->swp_io_count--;
1105
1106 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1107
1108 swf->swp_flags &= ~SWAP_WANTED;
1109 thread_wakeup((event_t) &swf->swp_flags);
1110 }
1111 done:
1112 lck_mtx_unlock(&vm_swap_data_lock);
1113
1114 if (retval == 0)
1115 return KERN_SUCCESS;
1116 else
1117 return KERN_FAILURE;
1118 }
1119
1120 kern_return_t
1121 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg)
1122 {
1123 unsigned int segidx = 0;
1124 struct swapfile *swf = NULL;
1125 uint64_t file_offset = 0;
1126 uint64_t swapfile_index = 0;
1127 unsigned int byte_for_segidx = 0;
1128 unsigned int offset_within_byte = 0;
1129 boolean_t swf_eligible = FALSE;
1130 boolean_t waiting = FALSE;
1131 boolean_t retried = FALSE;
1132 int error = 0;
1133 clock_sec_t sec;
1134 clock_nsec_t nsec;
1135
1136 if (addr == 0 || f_offset == NULL) {
1137 return KERN_FAILURE;
1138 }
1139 retry:
1140 lck_mtx_lock(&vm_swap_data_lock);
1141
1142 swf = (struct swapfile*) queue_first(&swf_global_queue);
1143
1144 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1145
1146 segidx = swf->swp_free_hint;
1147
1148 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1149
1150 if (swf_eligible) {
1151
1152 while(segidx < swf->swp_nsegs) {
1153
1154 byte_for_segidx = segidx >> 3;
1155 offset_within_byte = segidx % 8;
1156
1157 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1158 segidx++;
1159 continue;
1160 }
1161
1162 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1163
1164 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1165 swf->swp_nseginuse++;
1166 swf->swp_io_count++;
1167 swapfile_index = swf->swp_index;
1168
1169 vm_swapfile_total_segs_used++;
1170
1171 clock_get_system_nanotime(&sec, &nsec);
1172
1173 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1174 thread_wakeup((event_t) &vm_swapfile_create_needed);
1175
1176 lck_mtx_unlock(&vm_swap_data_lock);
1177
1178 goto done;
1179 }
1180 }
1181 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1182 }
1183 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1184
1185 /*
1186 * we've run out of swap segments, but may not
1187 * be in a position to immediately create a new swap
1188 * file if we've recently failed to create due to a lack
1189 * of free space in the root filesystem... we'll try
1190 * to kick that create off, but in any event we're going
1191 * to take a breather (up to 1 second) so that we're not caught in a tight
1192 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1193 * segments into swap files only to have them immediately put back
1194 * on the c_age queue due to vm_swap_put failing.
1195 *
1196 * if we're doing these puts due to a hibernation flush,
1197 * no need to block... setting hibernate_no_swapspace to TRUE,
1198 * will cause "vm_compressor_compact_and_swap" to immediately abort
1199 */
1200 clock_get_system_nanotime(&sec, &nsec);
1201
1202 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1203 thread_wakeup((event_t) &vm_swapfile_create_needed);
1204
1205 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1206 waiting = TRUE;
1207 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
1208 } else
1209 hibernate_no_swapspace = TRUE;
1210
1211 lck_mtx_unlock(&vm_swap_data_lock);
1212
1213 if (waiting == TRUE) {
1214 thread_block(THREAD_CONTINUE_NULL);
1215
1216 if (retried == FALSE && hibernate_flushing == TRUE) {
1217 retried = TRUE;
1218 goto retry;
1219 }
1220 }
1221 vm_swap_put_failures++;
1222
1223 return KERN_FAILURE;
1224
1225 done:
1226 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE);
1227
1228 lck_mtx_lock(&vm_swap_data_lock);
1229
1230 swf->swp_csegs[segidx] = c_seg;
1231
1232 swf->swp_io_count--;
1233
1234 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1235
1236 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1237
1238 swf->swp_flags &= ~SWAP_WANTED;
1239 thread_wakeup((event_t) &swf->swp_flags);
1240 }
1241
1242 lck_mtx_unlock(&vm_swap_data_lock);
1243
1244 if (error) {
1245 vm_swap_free(*f_offset);
1246
1247 vm_swap_put_failures++;
1248
1249 return KERN_FAILURE;
1250 }
1251 return KERN_SUCCESS;
1252 }
1253
1254
1255
1256 static void
1257 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1258 {
1259 uint64_t file_offset = 0;
1260 unsigned int segidx = 0;
1261
1262
1263 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1264
1265 unsigned int byte_for_segidx = 0;
1266 unsigned int offset_within_byte = 0;
1267
1268 file_offset = (f_offset & SWAP_SLOT_MASK);
1269 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1270
1271 byte_for_segidx = segidx >> 3;
1272 offset_within_byte = segidx % 8;
1273
1274 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1275
1276 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1277
1278 swf->swp_csegs[segidx] = NULL;
1279
1280 swf->swp_nseginuse--;
1281 vm_swapfile_total_segs_used--;
1282
1283 if (segidx < swf->swp_free_hint) {
1284 swf->swp_free_hint = segidx;
1285 }
1286 }
1287 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1288 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1289 }
1290 }
1291
1292
1293 uint32_t vm_swap_free_now_count = 0;
1294 uint32_t vm_swap_free_delayed_count = 0;
1295
1296
1297 void
1298 vm_swap_free(uint64_t f_offset)
1299 {
1300 struct swapfile *swf = NULL;
1301 struct trim_list *tl = NULL;
1302 clock_sec_t sec;
1303 clock_nsec_t nsec;
1304
1305 if (swp_trim_supported == TRUE)
1306 tl = kalloc(sizeof(struct trim_list));
1307
1308 lck_mtx_lock(&vm_swap_data_lock);
1309
1310 swf = vm_swapfile_for_handle(f_offset);
1311
1312 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1313
1314 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1315 /*
1316 * don't delay the free if the underlying disk doesn't support
1317 * trim, or we're in the midst of reclaiming this swap file since
1318 * we don't want to move segments that are technically free
1319 * but not yet handled by the delayed free mechanism
1320 */
1321 vm_swap_free_now(swf, f_offset);
1322
1323 vm_swap_free_now_count++;
1324 goto done;
1325 }
1326 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1327 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1328
1329 tl->tl_next = swf->swp_delayed_trim_list_head;
1330 swf->swp_delayed_trim_list_head = tl;
1331 swf->swp_delayed_trim_count++;
1332 tl = NULL;
1333
1334 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1335 clock_get_system_nanotime(&sec, &nsec);
1336
1337 if (sec > dont_trim_until_ts)
1338 thread_wakeup((event_t) &vm_swapfile_create_needed);
1339 }
1340 vm_swap_free_delayed_count++;
1341 }
1342 done:
1343 lck_mtx_unlock(&vm_swap_data_lock);
1344
1345 if (tl != NULL)
1346 kfree(tl, sizeof(struct trim_list));
1347 }
1348
1349
1350 static void
1351 vm_swap_wait_on_trim_handling_in_progress()
1352 {
1353 while (delayed_trim_handling_in_progress == TRUE) {
1354
1355 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1356 lck_mtx_unlock(&vm_swap_data_lock);
1357
1358 thread_block(THREAD_CONTINUE_NULL);
1359
1360 lck_mtx_lock(&vm_swap_data_lock);
1361 }
1362 }
1363
1364
1365 static void
1366 vm_swap_handle_delayed_trims(boolean_t force_now)
1367 {
1368 struct swapfile *swf = NULL;
1369
1370 /*
1371 * serialize the race between us and vm_swap_reclaim...
1372 * if vm_swap_reclaim wins it will turn off SWAP_READY
1373 * on the victim it has chosen... we can just skip over
1374 * that file since vm_swap_reclaim will first process
1375 * all of the delayed trims associated with it
1376 */
1377 lck_mtx_lock(&vm_swap_data_lock);
1378
1379 delayed_trim_handling_in_progress = TRUE;
1380
1381 lck_mtx_unlock(&vm_swap_data_lock);
1382
1383 /*
1384 * no need to hold the lock to walk the swf list since
1385 * vm_swap_create (the only place where we add to this list)
1386 * is run on the same thread as this function
1387 * and vm_swap_reclaim doesn't remove items from this list
1388 * instead marking them with SWAP_REUSE for future re-use
1389 */
1390 swf = (struct swapfile*) queue_first(&swf_global_queue);
1391
1392 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1393
1394 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1395
1396 assert(!(swf->swp_flags & SWAP_RECLAIM));
1397 vm_swap_do_delayed_trim(swf);
1398 }
1399 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1400 }
1401 lck_mtx_lock(&vm_swap_data_lock);
1402
1403 delayed_trim_handling_in_progress = FALSE;
1404 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1405
1406 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1407 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1408
1409 lck_mtx_unlock(&vm_swap_data_lock);
1410
1411 }
1412
1413 static void
1414 vm_swap_do_delayed_trim(struct swapfile *swf)
1415 {
1416 struct trim_list *tl, *tl_head;
1417
1418 lck_mtx_lock(&vm_swap_data_lock);
1419
1420 tl_head = swf->swp_delayed_trim_list_head;
1421 swf->swp_delayed_trim_list_head = NULL;
1422 swf->swp_delayed_trim_count = 0;
1423
1424 lck_mtx_unlock(&vm_swap_data_lock);
1425
1426 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1427
1428 while ((tl = tl_head) != NULL) {
1429 unsigned int segidx = 0;
1430 unsigned int byte_for_segidx = 0;
1431 unsigned int offset_within_byte = 0;
1432
1433 lck_mtx_lock(&vm_swap_data_lock);
1434
1435 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1436
1437 byte_for_segidx = segidx >> 3;
1438 offset_within_byte = segidx % 8;
1439
1440 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1441
1442 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1443
1444 swf->swp_csegs[segidx] = NULL;
1445
1446 swf->swp_nseginuse--;
1447 vm_swapfile_total_segs_used--;
1448
1449 if (segidx < swf->swp_free_hint) {
1450 swf->swp_free_hint = segidx;
1451 }
1452 }
1453 lck_mtx_unlock(&vm_swap_data_lock);
1454
1455 tl_head = tl->tl_next;
1456
1457 kfree(tl, sizeof(struct trim_list));
1458 }
1459 }
1460
1461
1462 void
1463 vm_swap_flush()
1464 {
1465 return;
1466 }
1467
1468 int vm_swap_reclaim_yielded = 0;
1469
1470 void
1471 vm_swap_reclaim(void)
1472 {
1473 vm_offset_t addr = 0;
1474 unsigned int segidx = 0;
1475 uint64_t f_offset = 0;
1476 struct swapfile *swf = NULL;
1477 struct swapfile *smallest_swf = NULL;
1478 unsigned int min_nsegs = 0;
1479 unsigned int byte_for_segidx = 0;
1480 unsigned int offset_within_byte = 0;
1481 uint32_t c_size = 0;
1482
1483 c_segment_t c_seg = NULL;
1484
1485 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1486 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1487 }
1488
1489 lck_mtx_lock(&vm_swap_data_lock);
1490
1491 /*
1492 * if we're running the swapfile list looking for
1493 * candidates with delayed trims, we need to
1494 * wait before making our decision concerning
1495 * the swapfile we want to reclaim
1496 */
1497 vm_swap_wait_on_trim_handling_in_progress();
1498
1499 /*
1500 * from here until we knock down the SWAP_READY bit,
1501 * we need to remain behind the vm_swap_data_lock...
1502 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1503 * will not consider this swapfile for processing
1504 */
1505 swf = (struct swapfile*) queue_first(&swf_global_queue);
1506 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1507 smallest_swf = NULL;
1508
1509 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1510
1511 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1512
1513 smallest_swf = swf;
1514 min_nsegs = swf->swp_nseginuse;
1515 }
1516 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1517 }
1518
1519 if (smallest_swf == NULL)
1520 goto done;
1521
1522 swf = smallest_swf;
1523
1524
1525 swf->swp_flags &= ~SWAP_READY;
1526 swf->swp_flags |= SWAP_RECLAIM;
1527
1528 if (swf->swp_delayed_trim_count) {
1529
1530 lck_mtx_unlock(&vm_swap_data_lock);
1531
1532 vm_swap_do_delayed_trim(swf);
1533
1534 lck_mtx_lock(&vm_swap_data_lock);
1535 }
1536 segidx = 0;
1537
1538 while (segidx < swf->swp_nsegs) {
1539
1540 ReTry_for_cseg:
1541 /*
1542 * Wait for outgoing I/Os.
1543 */
1544 while (swf->swp_io_count) {
1545
1546 swf->swp_flags |= SWAP_WANTED;
1547
1548 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1549 lck_mtx_unlock(&vm_swap_data_lock);
1550
1551 thread_block(THREAD_CONTINUE_NULL);
1552
1553 lck_mtx_lock(&vm_swap_data_lock);
1554 }
1555 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1556 vm_swap_reclaim_yielded++;
1557 break;
1558 }
1559
1560 byte_for_segidx = segidx >> 3;
1561 offset_within_byte = segidx % 8;
1562
1563 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1564
1565 segidx++;
1566 continue;
1567 }
1568
1569 c_seg = swf->swp_csegs[segidx];
1570 assert(c_seg);
1571
1572 lck_mtx_lock_spin_always(&c_seg->c_lock);
1573
1574 if (c_seg->c_busy) {
1575 /*
1576 * a swapped out c_segment in the process of being freed will remain in the
1577 * busy state until after the vm_swap_free is called on it... vm_swap_free
1578 * takes the vm_swap_data_lock, so can't change the swap state until after
1579 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1580 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1581 * at that point, we re-look up the swap state which will now indicate that
1582 * this c_segment no longer exists.
1583 */
1584 c_seg->c_wanted = 1;
1585
1586 assert_wait((event_t) (c_seg), THREAD_UNINT);
1587 lck_mtx_unlock_always(&c_seg->c_lock);
1588
1589 lck_mtx_unlock(&vm_swap_data_lock);
1590
1591 thread_block(THREAD_CONTINUE_NULL);
1592
1593 lck_mtx_lock(&vm_swap_data_lock);
1594
1595 goto ReTry_for_cseg;
1596 }
1597 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1598
1599 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1600
1601 assert(c_seg == swf->swp_csegs[segidx]);
1602 swf->swp_csegs[segidx] = NULL;
1603 swf->swp_nseginuse--;
1604
1605 vm_swapfile_total_segs_used--;
1606
1607 lck_mtx_unlock(&vm_swap_data_lock);
1608
1609 assert(C_SEG_IS_ONDISK(c_seg));
1610
1611 C_SEG_BUSY(c_seg);
1612 c_seg->c_busy_swapping = 1;
1613 #if !CHECKSUM_THE_SWAP
1614 c_seg_trim_tail(c_seg);
1615 #endif
1616 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1617
1618 assert(c_size <= C_SEG_BUFSIZE && c_size);
1619
1620 lck_mtx_unlock_always(&c_seg->c_lock);
1621
1622 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
1623
1624 /*
1625 * reading the data back in failed, so convert c_seg
1626 * to a swapped in c_segment that contains no data
1627 */
1628 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
1629 /*
1630 * returns with c_busy_swapping cleared
1631 */
1632
1633 vm_swap_get_failures++;
1634 goto swap_io_failed;
1635 }
1636 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
1637
1638 if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
1639 vm_offset_t c_buffer;
1640
1641 /*
1642 * the put failed, so convert c_seg to a fully swapped in c_segment
1643 * with valid data
1644 */
1645 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
1646
1647 kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1648
1649 memcpy((char *)c_buffer, (char *)addr, c_size);
1650
1651 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
1652 #if ENCRYPTED_SWAP
1653 vm_swap_decrypt(c_seg);
1654 #endif /* ENCRYPTED_SWAP */
1655 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
1656 /*
1657 * returns with c_busy_swapping cleared
1658 */
1659 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
1660
1661 goto swap_io_failed;
1662 }
1663 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
1664
1665 lck_mtx_lock_spin_always(&c_seg->c_lock);
1666
1667 assert(C_SEG_IS_ONDISK(c_seg));
1668 /*
1669 * The c_seg will now know about the new location on disk.
1670 */
1671 c_seg->c_store.c_swap_handle = f_offset;
1672
1673 assert(c_seg->c_busy_swapping);
1674 c_seg->c_busy_swapping = 0;
1675 swap_io_failed:
1676 assert(c_seg->c_busy);
1677 C_SEG_WAKEUP_DONE(c_seg);
1678
1679 lck_mtx_unlock_always(&c_seg->c_lock);
1680 lck_mtx_lock(&vm_swap_data_lock);
1681 }
1682
1683 if (swf->swp_nseginuse) {
1684
1685 swf->swp_flags &= ~SWAP_RECLAIM;
1686 swf->swp_flags |= SWAP_READY;
1687
1688 goto done;
1689 }
1690 /*
1691 * We don't remove this inactive swf from the queue.
1692 * That way, we can re-use it when needed again and
1693 * preserve the namespace. The delayed_trim processing
1694 * is also dependent on us not removing swfs from the queue.
1695 */
1696 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1697
1698 vm_num_swap_files--;
1699
1700 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
1701
1702 lck_mtx_unlock(&vm_swap_data_lock);
1703
1704 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1705
1706 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
1707 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
1708
1709 lck_mtx_lock(&vm_swap_data_lock);
1710
1711 if (swf->swp_flags & SWAP_PINNED) {
1712 vm_num_pinned_swap_files--;
1713 vm_swappin_avail += swf->swp_size;
1714 }
1715
1716 swf->swp_vp = NULL;
1717 swf->swp_size = 0;
1718 swf->swp_free_hint = 0;
1719 swf->swp_nsegs = 0;
1720 swf->swp_flags = SWAP_REUSE;
1721
1722 done:
1723 thread_wakeup((event_t) &swf->swp_flags);
1724 lck_mtx_unlock(&vm_swap_data_lock);
1725
1726 kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
1727 }
1728
1729
1730 uint64_t
1731 vm_swap_get_total_space(void)
1732 {
1733 uint64_t total_space = 0;
1734
1735 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
1736
1737 return total_space;
1738 }
1739
1740 uint64_t
1741 vm_swap_get_used_space(void)
1742 {
1743 uint64_t used_space = 0;
1744
1745 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
1746
1747 return used_space;
1748 }
1749
1750 uint64_t
1751 vm_swap_get_free_space(void)
1752 {
1753 return (vm_swap_get_total_space() - vm_swap_get_used_space());
1754 }
1755
1756
1757 int
1758 vm_swap_low_on_space(void)
1759 {
1760
1761 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
1762 return (0);
1763
1764 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
1765
1766 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1767 return (0);
1768
1769 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
1770 return (1);
1771 }
1772 return (0);
1773 }
1774
1775 boolean_t
1776 vm_swap_files_pinned(void)
1777 {
1778 boolean_t result;
1779
1780 if (vm_swappin_enabled == FALSE)
1781 return(TRUE);
1782
1783 result = (vm_num_pinned_swap_files == vm_num_swap_files);
1784
1785 return (result);
1786 }