osfmk/vm/vm_compressor_backing_store.c

   1 /*
   2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include "vm_compressor_backing_store.h"
  30 #include <vm/vm_pageout.h>
  31 #include <vm/vm_protos.h>
  32
  33 #include <IOKit/IOHibernatePrivate.h>
  34
  35 #include <kern/policy_internal.h>
  36
  37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
  38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
  39
  40 #if defined(XNU_TARGET_OS_OSX)
  41 /*
  42  * launchd explicitly turns ON swap later during boot on macOS devices.
  43  */
  44 boolean_t       compressor_store_stop_compaction = TRUE;
  45 #else
  46 boolean_t       compressor_store_stop_compaction = FALSE;
  47 #endif
  48
  49 boolean_t       vm_swapfile_create_needed = FALSE;
  50 boolean_t       vm_swapfile_gc_needed = FALSE;
  51
  52 int             vm_swapper_throttle = -1;
  53 uint64_t        vm_swapout_thread_id;
  54
  55 uint64_t        vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
  56 uint64_t        vm_swap_get_failures = 0; /* Fatal */
  57 uint64_t        vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
  58 int             vm_num_swap_files_config = 0;
  59 int             vm_num_swap_files = 0;
  60 int             vm_num_pinned_swap_files = 0;
  61 int             vm_swapout_thread_processed_segments = 0;
  62 int             vm_swapout_thread_awakened = 0;
  63 bool            vm_swapout_thread_running = FALSE;
  64 int             vm_swapfile_create_thread_awakened = 0;
  65 int             vm_swapfile_create_thread_running = 0;
  66 int             vm_swapfile_gc_thread_awakened = 0;
  67 int             vm_swapfile_gc_thread_running = 0;
  68
  69 int64_t         vm_swappin_avail = 0;
  70 boolean_t       vm_swappin_enabled = FALSE;
  71 unsigned int    vm_swapfile_total_segs_alloced = 0;
  72 unsigned int    vm_swapfile_total_segs_used = 0;
  73
  74 char            swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
  75
  76 extern vm_map_t compressor_map;
  77
  78
  79 #define SWAP_READY      0x1     /* Swap file is ready to be used */
  80 #define SWAP_RECLAIM    0x2     /* Swap file is marked to be reclaimed */
  81 #define SWAP_WANTED     0x4     /* Swap file has waiters */
  82 #define SWAP_REUSE      0x8     /* Swap file is on the Q and has a name. Reuse after init-ing.*/
  83 #define SWAP_PINNED     0x10    /* Swap file is pinned (FusionDrive) */
  84
  85
  86 struct swapfile {
  87         queue_head_t            swp_queue;      /* list of swap files */
  88         char                    *swp_path;      /* saved pathname of swap file */
  89         struct vnode            *swp_vp;        /* backing vnode */
  90         uint64_t                swp_size;       /* size of this swap file */
  91         uint8_t                 *swp_bitmap;    /* bitmap showing the alloced/freed slots in the swap file */
  92         unsigned int            swp_pathlen;    /* length of pathname */
  93         unsigned int            swp_nsegs;      /* #segments we can use */
  94         unsigned int            swp_nseginuse;  /* #segments in use */
  95         unsigned int            swp_index;      /* index of this swap file */
  96         unsigned int            swp_flags;      /* state of swap file */
  97         unsigned int            swp_free_hint;  /* offset of 1st free chunk */
  98         unsigned int            swp_io_count;   /* count of outstanding I/Os */
  99         c_segment_t             *swp_csegs;     /* back pointers to the c_segments. Used during swap reclaim. */
 100
 101         struct trim_list        *swp_delayed_trim_list_head;
 102         unsigned int            swp_delayed_trim_count;
 103 };
 104
 105 queue_head_t    swf_global_queue;
 106 boolean_t       swp_trim_supported = FALSE;
 107
 108 extern clock_sec_t      dont_trim_until_ts;
 109 clock_sec_t             vm_swapfile_last_failed_to_create_ts = 0;
 110 clock_sec_t             vm_swapfile_last_successful_create_ts = 0;
 111 int                     vm_swapfile_can_be_created = FALSE;
 112 boolean_t               delayed_trim_handling_in_progress = FALSE;
 113
 114 boolean_t               hibernate_in_progress_with_pinned_swap = FALSE;
 115
 116 static void vm_swapout_thread_throttle_adjust(void);
 117 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
 118 static void vm_swapout_thread(void);
 119 static void vm_swapfile_create_thread(void);
 120 static void vm_swapfile_gc_thread(void);
 121 static void vm_swap_defragment(void);
 122 static void vm_swap_handle_delayed_trims(boolean_t);
 123 static void vm_swap_do_delayed_trim(struct swapfile *);
 124 static void vm_swap_wait_on_trim_handling_in_progress(void);
 125
 126 extern int vnode_getwithref(struct vnode* vp);
 127
 128 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
 129
 130 #if CONFIG_EMBEDDED
 131
 132 /*
 133  * For CONFIG_FREEZE, we scale the c_segments_limit based on the
 134  * number of swapfiles allowed. That increases wired memory overhead.
 135  * So we want to keep the max swapfiles same on both DEV/RELEASE so
 136  * that the memory overhead is similar for performance comparisons.
 137  */
 138 #define VM_MAX_SWAP_FILE_NUM            5
 139
 140 #define VM_SWAPFILE_DELAYED_TRIM_MAX    4
 141
 142 #define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
 143 #define VM_SWAP_SHOULD_PIN(_size)       FALSE
 144 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
 145                                          ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
 146 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
 147
 148 #else /* CONFIG_EMBEDDED */
 149
 150 #define VM_MAX_SWAP_FILE_NUM            100
 151 #define VM_SWAPFILE_DELAYED_TRIM_MAX    128
 152
 153 #define VM_SWAP_SHOULD_DEFRAGMENT()     (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
 154 #define VM_SWAP_SHOULD_PIN(_size)       (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
 155 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
 156                                          ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
 157 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
 158
 159 #endif /* CONFIG_EMBEDDED */
 160
 161 #define VM_SWAP_SHOULD_RECLAIM()        (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
 162 #define VM_SWAP_SHOULD_ABORT_RECLAIM()  (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
 163 #define VM_SWAPFILE_DELAYED_CREATE      15
 164
 165 #define VM_SWAP_BUSY()  ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
 166
 167
 168 #if CHECKSUM_THE_SWAP
 169 extern unsigned int hash_string(char *cp, int len);
 170 #endif
 171
 172 #if RECORD_THE_COMPRESSED_DATA
 173 boolean_t       c_compressed_record_init_done = FALSE;
 174 int             c_compressed_record_write_error = 0;
 175 struct vnode    *c_compressed_record_vp = NULL;
 176 uint64_t        c_compressed_record_file_offset = 0;
 177 void    c_compressed_record_init(void);
 178 void    c_compressed_record_write(char *, int);
 179 #endif
 180
 181 extern void                     vm_pageout_io_throttle(void);
 182
 183 static struct swapfile *vm_swapfile_for_handle(uint64_t);
 184
 185 /*
 186  * Called with the vm_swap_data_lock held.
 187  */
 188
 189 static struct swapfile *
 190 vm_swapfile_for_handle(uint64_t f_offset)
 191 {
 192         uint64_t                file_offset = 0;
 193         unsigned int            swapfile_index = 0;
 194         struct swapfile*        swf = NULL;
 195
 196         file_offset = (f_offset & SWAP_SLOT_MASK);
 197         swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
 198
 199         swf = (struct swapfile*) queue_first(&swf_global_queue);
 200
 201         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
 202                 if (swapfile_index == swf->swp_index) {
 203                         break;
 204                 }
 205
 206                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
 207         }
 208
 209         if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
 210                 swf = NULL;
 211         }
 212
 213         return swf;
 214 }
 215
 216 #if ENCRYPTED_SWAP
 217
 218 #include <libkern/crypto/aesxts.h>
 219
 220 extern int cc_rand_generate(void *, size_t);     /* from libkern/cyrpto/rand.h> */
 221
 222 boolean_t       swap_crypt_initialized;
 223 void            swap_crypt_initialize(void);
 224
 225 symmetric_xts   xts_modectx;
 226 uint32_t        swap_crypt_key1[8];   /* big enough for a 256 bit random key */
 227 uint32_t        swap_crypt_key2[8];   /* big enough for a 256 bit random key */
 228
 229 #if DEVELOPMENT || DEBUG
 230 boolean_t       swap_crypt_xts_tested = FALSE;
 231 unsigned char   swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
 232 unsigned char   swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
 233 unsigned char   swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
 234 #endif /* DEVELOPMENT || DEBUG */
 235
 236 unsigned long   vm_page_encrypt_counter;
 237 unsigned long   vm_page_decrypt_counter;
 238
 239
 240 void
 241 swap_crypt_initialize(void)
 242 {
 243         uint8_t  *enckey1, *enckey2;
 244         int      keylen1, keylen2;
 245         int      error;
 246
 247         assert(swap_crypt_initialized == FALSE);
 248
 249         keylen1 = sizeof(swap_crypt_key1);
 250         enckey1 = (uint8_t *)&swap_crypt_key1;
 251         keylen2 = sizeof(swap_crypt_key2);
 252         enckey2 = (uint8_t *)&swap_crypt_key2;
 253
 254         error = cc_rand_generate((void *)enckey1, keylen1);
 255         assert(!error);
 256
 257         error = cc_rand_generate((void *)enckey2, keylen2);
 258         assert(!error);
 259
 260         error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
 261         assert(!error);
 262
 263         swap_crypt_initialized = TRUE;
 264
 265 #if DEVELOPMENT || DEBUG
 266         uint8_t *encptr;
 267         uint8_t *decptr;
 268         uint8_t *refptr;
 269         uint8_t *iv;
 270         uint64_t ivnum[2];
 271         int size = 0;
 272         int i    = 0;
 273         int rc   = 0;
 274
 275         assert(swap_crypt_xts_tested == FALSE);
 276
 277         /*
 278          * Validate the encryption algorithms.
 279          *
 280          * First initialize the test data.
 281          */
 282         for (i = 0; i < 4096; i++) {
 283                 swap_crypt_test_page_ref[i] = (char) i;
 284         }
 285         ivnum[0] = (uint64_t)0xaa;
 286         ivnum[1] = 0;
 287         iv = (uint8_t *)ivnum;
 288
 289         refptr = (uint8_t *)swap_crypt_test_page_ref;
 290         encptr = (uint8_t *)swap_crypt_test_page_encrypt;
 291         decptr = (uint8_t *)swap_crypt_test_page_decrypt;
 292         size = 4096;
 293
 294         /* encrypt */
 295         rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
 296         assert(!rc);
 297
 298         /* compare result with original - should NOT match */
 299         for (i = 0; i < 4096; i++) {
 300                 if (swap_crypt_test_page_encrypt[i] !=
 301                     swap_crypt_test_page_ref[i]) {
 302                         break;
 303                 }
 304         }
 305         assert(i != 4096);
 306
 307         /* decrypt */
 308         rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
 309         assert(!rc);
 310
 311         /* compare result with original */
 312         for (i = 0; i < 4096; i++) {
 313                 if (swap_crypt_test_page_decrypt[i] !=
 314                     swap_crypt_test_page_ref[i]) {
 315                         panic("encryption test failed");
 316                 }
 317         }
 318         /* encrypt in place */
 319         rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
 320         assert(!rc);
 321
 322         /* decrypt in place */
 323         rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
 324         assert(!rc);
 325
 326         for (i = 0; i < 4096; i++) {
 327                 if (swap_crypt_test_page_decrypt[i] !=
 328                     swap_crypt_test_page_ref[i]) {
 329                         panic("in place encryption test failed");
 330                 }
 331         }
 332         swap_crypt_xts_tested = TRUE;
 333 #endif /* DEVELOPMENT || DEBUG */
 334 }
 335
 336
 337 void
 338 vm_swap_encrypt(c_segment_t c_seg)
 339 {
 340         uint8_t *ptr;
 341         uint8_t *iv;
 342         uint64_t ivnum[2];
 343         int size = 0;
 344         int rc   = 0;
 345
 346         if (swap_crypt_initialized == FALSE) {
 347                 swap_crypt_initialize();
 348         }
 349
 350 #if DEVELOPMENT || DEBUG
 351         C_SEG_MAKE_WRITEABLE(c_seg);
 352 #endif
 353         ptr = (uint8_t *)c_seg->c_store.c_buffer;
 354         size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
 355
 356         ivnum[0] = (uint64_t)c_seg;
 357         ivnum[1] = 0;
 358         iv = (uint8_t *)ivnum;
 359
 360         rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
 361         assert(!rc);
 362
 363         vm_page_encrypt_counter += (size / PAGE_SIZE_64);
 364
 365 #if DEVELOPMENT || DEBUG
 366         C_SEG_WRITE_PROTECT(c_seg);
 367 #endif
 368 }
 369
 370 void
 371 vm_swap_decrypt(c_segment_t c_seg)
 372 {
 373         uint8_t *ptr;
 374         uint8_t *iv;
 375         uint64_t ivnum[2];
 376         int size = 0;
 377         int rc   = 0;
 378
 379         assert(swap_crypt_initialized);
 380
 381 #if DEVELOPMENT || DEBUG
 382         C_SEG_MAKE_WRITEABLE(c_seg);
 383 #endif
 384         ptr = (uint8_t *)c_seg->c_store.c_buffer;
 385         size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
 386
 387         ivnum[0] = (uint64_t)c_seg;
 388         ivnum[1] = 0;
 389         iv = (uint8_t *)ivnum;
 390
 391         rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
 392         assert(!rc);
 393
 394         vm_page_decrypt_counter += (size / PAGE_SIZE_64);
 395
 396 #if DEVELOPMENT || DEBUG
 397         C_SEG_WRITE_PROTECT(c_seg);
 398 #endif
 399 }
 400 #endif /* ENCRYPTED_SWAP */
 401
 402
 403 void
 404 vm_compressor_swap_init()
 405 {
 406         thread_t        thread = NULL;
 407
 408         queue_init(&swf_global_queue);
 409
 410         if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
 411             BASEPRI_VM, &thread) != KERN_SUCCESS) {
 412                 panic("vm_swapout_thread: create failed");
 413         }
 414         thread_set_thread_name(thread, "VM_swapout");
 415         vm_swapout_thread_id = thread->thread_id;
 416
 417         thread_deallocate(thread);
 418
 419         if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
 420             BASEPRI_VM, &thread) != KERN_SUCCESS) {
 421                 panic("vm_swapfile_create_thread: create failed");
 422         }
 423
 424         thread_set_thread_name(thread, "VM_swapfile_create");
 425         thread_deallocate(thread);
 426
 427         if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
 428             BASEPRI_VM, &thread) != KERN_SUCCESS) {
 429                 panic("vm_swapfile_gc_thread: create failed");
 430         }
 431         thread_set_thread_name(thread, "VM_swapfile_gc");
 432
 433         /*
 434          * Swapfile garbage collection will need to allocate memory
 435          * to complete its swap reclaim and in-memory compaction.
 436          * So allow it to dip into the reserved VM page pool.
 437          */
 438         thread_lock(thread);
 439         thread->options |= TH_OPT_VMPRIV;
 440         thread_unlock(thread);
 441
 442         thread_deallocate(thread);
 443
 444         proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
 445             TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
 446         proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
 447             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 448
 449 #if CONFIG_EMBEDDED
 450         /*
 451          * dummy value until the swap file gets created
 452          * when we drive the first c_segment_t to the
 453          * swapout queue... at that time we will
 454          * know the true size we have to work with
 455          */
 456         c_overage_swapped_limit = 16;
 457 #endif
 458
 459         vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
 460
 461         printf("VM Swap Subsystem is ON\n");
 462 }
 463
 464
 465 #if RECORD_THE_COMPRESSED_DATA
 466
 467 void
 468 c_compressed_record_init()
 469 {
 470         if (c_compressed_record_init_done == FALSE) {
 471                 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
 472                 c_compressed_record_init_done = TRUE;
 473         }
 474 }
 475
 476 void
 477 c_compressed_record_write(char *buf, int size)
 478 {
 479         if (c_compressed_record_write_error == 0) {
 480                 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
 481                 c_compressed_record_file_offset += size;
 482         }
 483 }
 484 #endif
 485
 486
 487 int             compaction_swapper_inited = 0;
 488
 489 void
 490 vm_compaction_swapper_do_init(void)
 491 {
 492         struct  vnode *vp;
 493         char    *pathname;
 494         int     namelen;
 495
 496         if (compaction_swapper_inited) {
 497                 return;
 498         }
 499
 500         if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
 501                 compaction_swapper_inited = 1;
 502                 return;
 503         }
 504         lck_mtx_lock(&vm_swap_data_lock);
 505
 506         if (!compaction_swapper_inited) {
 507                 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
 508                 pathname = kheap_alloc(KHEAP_TEMP, namelen, Z_WAITOK | Z_ZERO);
 509                 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
 510
 511                 vm_swapfile_open(pathname, &vp);
 512
 513                 if (vp) {
 514                         if (vnode_pager_isSSD(vp) == FALSE) {
 515                                 /*
 516                                  * swap files live on an HDD, so let's make sure to start swapping
 517                                  * much earlier since we're not worried about SSD write-wear and
 518                                  * we have so little write bandwidth to work with
 519                                  * these values were derived expermentially by running the performance
 520                                  * teams stock test for evaluating HDD performance against various
 521                                  * combinations and looking and comparing overall results.
 522                                  * Note that the > relationship between these 4 values must be maintained
 523                                  */
 524                                 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
 525                                         vm_compressor_minorcompact_threshold_divisor = 15;
 526                                 }
 527                                 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
 528                                         vm_compressor_majorcompact_threshold_divisor = 18;
 529                                 }
 530                                 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
 531                                         vm_compressor_unthrottle_threshold_divisor = 24;
 532                                 }
 533                                 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
 534                                         vm_compressor_catchup_threshold_divisor = 30;
 535                                 }
 536                         }
 537 #if !CONFIG_EMBEDDED
 538                         vnode_setswapmount(vp);
 539                         vm_swappin_avail = vnode_getswappin_avail(vp);
 540
 541                         if (vm_swappin_avail) {
 542                                 vm_swappin_enabled = TRUE;
 543                         }
 544 #endif
 545                         vm_swapfile_close((uint64_t)pathname, vp);
 546                 }
 547                 kheap_free(KHEAP_TEMP, pathname, namelen);
 548
 549                 compaction_swapper_inited = 1;
 550         }
 551         lck_mtx_unlock(&vm_swap_data_lock);
 552 }
 553
 554
 555 void
 556 vm_swap_consider_defragmenting(int flags)
 557 {
 558         boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
 559         boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
 560
 561         if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
 562             (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
 563                 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
 564                         lck_mtx_lock(&vm_swap_data_lock);
 565
 566                         if (force_defrag) {
 567                                 vm_swap_force_defrag = TRUE;
 568                         }
 569
 570                         if (force_reclaim) {
 571                                 vm_swap_force_reclaim = TRUE;
 572                         }
 573
 574                         if (!vm_swapfile_gc_thread_running) {
 575                                 thread_wakeup((event_t) &vm_swapfile_gc_needed);
 576                         }
 577
 578                         lck_mtx_unlock(&vm_swap_data_lock);
 579                 }
 580         }
 581 }
 582
 583
 584 int vm_swap_defragment_yielded = 0;
 585 int vm_swap_defragment_swapin = 0;
 586 int vm_swap_defragment_free = 0;
 587 int vm_swap_defragment_busy = 0;
 588
 589 #if CONFIG_FREEZE
 590 extern uint32_t c_segment_pages_compressed_incore;
 591 extern uint32_t c_segment_pages_compressed_nearing_limit;
 592 extern uint32_t c_segment_count;
 593 extern uint32_t c_segments_nearing_limit;
 594
 595 boolean_t       memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
 596
 597 extern bool freezer_incore_cseg_acct;
 598 #endif /* CONFIG_FREEZE */
 599
 600 static void
 601 vm_swap_defragment()
 602 {
 603         c_segment_t     c_seg;
 604
 605         /*
 606          * have to grab the master lock w/o holding
 607          * any locks in spin mode
 608          */
 609         PAGE_REPLACEMENT_DISALLOWED(TRUE);
 610
 611         lck_mtx_lock_spin_always(c_list_lock);
 612
 613         while (!queue_empty(&c_swappedout_sparse_list_head)) {
 614                 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
 615                         vm_swap_defragment_yielded++;
 616                         break;
 617                 }
 618                 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
 619
 620                 lck_mtx_lock_spin_always(&c_seg->c_lock);
 621
 622                 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
 623
 624                 if (c_seg->c_busy) {
 625                         lck_mtx_unlock_always(c_list_lock);
 626
 627                         PAGE_REPLACEMENT_DISALLOWED(FALSE);
 628                         /*
 629                          * c_seg_wait_on_busy consumes c_seg->c_lock
 630                          */
 631                         c_seg_wait_on_busy(c_seg);
 632
 633                         PAGE_REPLACEMENT_DISALLOWED(TRUE);
 634
 635                         lck_mtx_lock_spin_always(c_list_lock);
 636
 637                         vm_swap_defragment_busy++;
 638                         continue;
 639                 }
 640                 if (c_seg->c_bytes_used == 0) {
 641                         /*
 642                          * c_seg_free_locked consumes the c_list_lock
 643                          * and c_seg->c_lock
 644                          */
 645                         C_SEG_BUSY(c_seg);
 646                         c_seg_free_locked(c_seg);
 647
 648                         vm_swap_defragment_free++;
 649                 } else {
 650                         lck_mtx_unlock_always(c_list_lock);
 651
 652 #if CONFIG_FREEZE
 653                         if (freezer_incore_cseg_acct) {
 654                                 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
 655                                         memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
 656                                 }
 657
 658                                 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
 659                                 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
 660                                         memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
 661                                 }
 662                         }
 663 #endif /* CONFIG_FREEZE */
 664                         if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
 665                                 lck_mtx_unlock_always(&c_seg->c_lock);
 666                         }
 667
 668                         vm_swap_defragment_swapin++;
 669                 }
 670                 PAGE_REPLACEMENT_DISALLOWED(FALSE);
 671
 672                 vm_pageout_io_throttle();
 673
 674                 /*
 675                  * because write waiters have privilege over readers,
 676                  * dropping and immediately retaking the master lock will
 677                  * still allow any thread waiting to acquire the
 678                  * master lock exclusively an opportunity to take it
 679                  */
 680                 PAGE_REPLACEMENT_DISALLOWED(TRUE);
 681
 682                 lck_mtx_lock_spin_always(c_list_lock);
 683         }
 684         lck_mtx_unlock_always(c_list_lock);
 685
 686         PAGE_REPLACEMENT_DISALLOWED(FALSE);
 687 }
 688
 689
 690
 691 static void
 692 vm_swapfile_create_thread(void)
 693 {
 694         clock_sec_t     sec;
 695         clock_nsec_t    nsec;
 696
 697         current_thread()->options |= TH_OPT_VMPRIV;
 698
 699         vm_swapfile_create_thread_awakened++;
 700         vm_swapfile_create_thread_running = 1;
 701
 702         while (TRUE) {
 703                 /*
 704                  * walk through the list of swap files
 705                  * and do the delayed frees/trims for
 706                  * any swap file whose count of delayed
 707                  * frees is above the batch limit
 708                  */
 709                 vm_swap_handle_delayed_trims(FALSE);
 710
 711                 lck_mtx_lock(&vm_swap_data_lock);
 712
 713                 if (hibernate_in_progress_with_pinned_swap == TRUE) {
 714                         break;
 715                 }
 716
 717                 if (compressor_store_stop_compaction == TRUE) {
 718                         break;
 719                 }
 720
 721                 clock_get_system_nanotime(&sec, &nsec);
 722
 723                 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
 724                         break;
 725                 }
 726
 727                 lck_mtx_unlock(&vm_swap_data_lock);
 728
 729                 if (vm_swap_create_file() == FALSE) {
 730                         vm_swapfile_last_failed_to_create_ts = sec;
 731                         HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
 732                 } else {
 733                         vm_swapfile_last_successful_create_ts = sec;
 734                 }
 735         }
 736         vm_swapfile_create_thread_running = 0;
 737
 738         if (hibernate_in_progress_with_pinned_swap == TRUE) {
 739                 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
 740         }
 741
 742         if (compressor_store_stop_compaction == TRUE) {
 743                 thread_wakeup((event_t)&compressor_store_stop_compaction);
 744         }
 745
 746         assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
 747
 748         lck_mtx_unlock(&vm_swap_data_lock);
 749
 750         thread_block((thread_continue_t)vm_swapfile_create_thread);
 751
 752         /* NOTREACHED */
 753 }
 754
 755
 756 #if HIBERNATION
 757
 758 kern_return_t
 759 hibernate_pin_swap(boolean_t start)
 760 {
 761         vm_compaction_swapper_do_init();
 762
 763         if (start == FALSE) {
 764                 lck_mtx_lock(&vm_swap_data_lock);
 765                 hibernate_in_progress_with_pinned_swap = FALSE;
 766                 lck_mtx_unlock(&vm_swap_data_lock);
 767
 768                 return KERN_SUCCESS;
 769         }
 770         if (vm_swappin_enabled == FALSE) {
 771                 return KERN_SUCCESS;
 772         }
 773
 774         lck_mtx_lock(&vm_swap_data_lock);
 775
 776         hibernate_in_progress_with_pinned_swap = TRUE;
 777
 778         while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
 779                 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
 780
 781                 lck_mtx_unlock(&vm_swap_data_lock);
 782
 783                 thread_block(THREAD_CONTINUE_NULL);
 784
 785                 lck_mtx_lock(&vm_swap_data_lock);
 786         }
 787         if (vm_num_swap_files > vm_num_pinned_swap_files) {
 788                 hibernate_in_progress_with_pinned_swap = FALSE;
 789                 lck_mtx_unlock(&vm_swap_data_lock);
 790
 791                 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
 792                     vm_num_swap_files, vm_num_pinned_swap_files);
 793                 return KERN_FAILURE;
 794         }
 795         lck_mtx_unlock(&vm_swap_data_lock);
 796
 797         while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
 798                 if (vm_swap_create_file() == FALSE) {
 799                         break;
 800                 }
 801         }
 802         return KERN_SUCCESS;
 803 }
 804 #endif
 805
 806 static void
 807 vm_swapfile_gc_thread(void)
 808 {
 809         boolean_t       need_defragment;
 810         boolean_t       need_reclaim;
 811
 812         vm_swapfile_gc_thread_awakened++;
 813         vm_swapfile_gc_thread_running = 1;
 814
 815         while (TRUE) {
 816                 lck_mtx_lock(&vm_swap_data_lock);
 817
 818                 if (hibernate_in_progress_with_pinned_swap == TRUE) {
 819                         break;
 820                 }
 821
 822                 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
 823                         break;
 824                 }
 825
 826                 need_defragment = FALSE;
 827                 need_reclaim = FALSE;
 828
 829                 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
 830                         need_defragment = TRUE;
 831                 }
 832
 833                 if (VM_SWAP_SHOULD_RECLAIM()) {
 834                         need_defragment = TRUE;
 835                         need_reclaim = TRUE;
 836                 }
 837                 if (need_defragment == FALSE && need_reclaim == FALSE) {
 838                         break;
 839                 }
 840
 841                 vm_swap_force_defrag = FALSE;
 842                 vm_swap_force_reclaim = FALSE;
 843
 844                 lck_mtx_unlock(&vm_swap_data_lock);
 845
 846                 if (need_defragment == TRUE) {
 847                         vm_swap_defragment();
 848                 }
 849                 if (need_reclaim == TRUE) {
 850                         vm_swap_reclaim();
 851                 }
 852         }
 853         vm_swapfile_gc_thread_running = 0;
 854
 855         if (hibernate_in_progress_with_pinned_swap == TRUE) {
 856                 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
 857         }
 858
 859         if (compressor_store_stop_compaction == TRUE) {
 860                 thread_wakeup((event_t)&compressor_store_stop_compaction);
 861         }
 862
 863         assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
 864
 865         lck_mtx_unlock(&vm_swap_data_lock);
 866
 867         thread_block((thread_continue_t)vm_swapfile_gc_thread);
 868
 869         /* NOTREACHED */
 870 }
 871
 872
 873
 874 #define   VM_SWAPOUT_LIMIT_T2P  4
 875 #define   VM_SWAPOUT_LIMIT_T1P  4
 876 #define   VM_SWAPOUT_LIMIT_T0P  6
 877 #define   VM_SWAPOUT_LIMIT_T0   8
 878 #define   VM_SWAPOUT_LIMIT_MAX  8
 879
 880 #define   VM_SWAPOUT_START      0
 881 #define   VM_SWAPOUT_T2_PASSIVE 1
 882 #define   VM_SWAPOUT_T1_PASSIVE 2
 883 #define   VM_SWAPOUT_T0_PASSIVE 3
 884 #define   VM_SWAPOUT_T0         4
 885
 886 int vm_swapout_state = VM_SWAPOUT_START;
 887 int vm_swapout_limit = 1;
 888
 889 int vm_swapper_entered_T0  = 0;
 890 int vm_swapper_entered_T0P = 0;
 891 int vm_swapper_entered_T1P = 0;
 892 int vm_swapper_entered_T2P = 0;
 893
 894
 895 static void
 896 vm_swapout_thread_throttle_adjust(void)
 897 {
 898         switch (vm_swapout_state) {
 899         case VM_SWAPOUT_START:
 900
 901                 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
 902                 vm_swapper_entered_T2P++;
 903
 904                 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 905                     TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
 906                 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 907                     TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 908                 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
 909                 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
 910
 911                 break;
 912
 913         case VM_SWAPOUT_T2_PASSIVE:
 914
 915                 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
 916                         vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
 917                         vm_swapper_entered_T0P++;
 918
 919                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 920                             TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
 921                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 922                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 923                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
 924                         vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
 925
 926                         break;
 927                 }
 928                 if (swapout_target_age || hibernate_flushing == TRUE) {
 929                         vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
 930                         vm_swapper_entered_T1P++;
 931
 932                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 933                             TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
 934                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 935                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 936                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
 937                         vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
 938                 }
 939                 break;
 940
 941         case VM_SWAPOUT_T1_PASSIVE:
 942
 943                 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
 944                         vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
 945                         vm_swapper_entered_T0P++;
 946
 947                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 948                             TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
 949                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 950                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 951                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
 952                         vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
 953
 954                         break;
 955                 }
 956                 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
 957                         vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
 958                         vm_swapper_entered_T2P++;
 959
 960                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 961                             TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
 962                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 963                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 964                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
 965                         vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
 966                 }
 967                 break;
 968
 969         case VM_SWAPOUT_T0_PASSIVE:
 970
 971                 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
 972                         vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
 973                         vm_swapper_entered_T2P++;
 974
 975                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 976                             TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
 977                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 978                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 979                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
 980                         vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
 981
 982                         break;
 983                 }
 984                 if (SWAPPER_NEEDS_TO_CATCHUP()) {
 985                         vm_swapper_entered_T0++;
 986
 987                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
 988                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
 989                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
 990                         vm_swapout_state = VM_SWAPOUT_T0;
 991                 }
 992                 break;
 993
 994         case VM_SWAPOUT_T0:
 995
 996                 if (SWAPPER_HAS_CAUGHTUP()) {
 997                         vm_swapper_entered_T0P++;
 998
 999                         proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1000                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1001                         vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1002                         vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1003                 }
1004                 break;
1005         }
1006 }
1007
1008 int vm_swapout_found_empty = 0;
1009
1010 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1011
1012 int vm_swapout_soc_busy = 0;
1013 int vm_swapout_soc_done = 0;
1014
1015
1016 static struct swapout_io_completion *
1017 vm_swapout_find_free_soc(void)
1018 {
1019         int      i;
1020
1021         for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1022                 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1023                         return &vm_swapout_ctx[i];
1024                 }
1025         }
1026         assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1027
1028         return NULL;
1029 }
1030
1031 static struct swapout_io_completion *
1032 vm_swapout_find_done_soc(void)
1033 {
1034         int      i;
1035
1036         if (vm_swapout_soc_done) {
1037                 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1038                         if (vm_swapout_ctx[i].swp_io_done) {
1039                                 return &vm_swapout_ctx[i];
1040                         }
1041                 }
1042         }
1043         return NULL;
1044 }
1045
1046 static void
1047 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1048 {
1049         kern_return_t  kr;
1050
1051         if (soc->swp_io_error) {
1052                 kr = KERN_FAILURE;
1053         } else {
1054                 kr = KERN_SUCCESS;
1055         }
1056
1057         lck_mtx_unlock_always(c_list_lock);
1058
1059         vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1060         vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1061
1062         lck_mtx_lock_spin_always(c_list_lock);
1063
1064         soc->swp_io_done = 0;
1065         soc->swp_io_busy = 0;
1066
1067         vm_swapout_soc_busy--;
1068         vm_swapout_soc_done--;
1069 }
1070
1071
1072 static void
1073 vm_swapout_thread(void)
1074 {
1075         uint32_t        size = 0;
1076         c_segment_t     c_seg = NULL;
1077         kern_return_t   kr = KERN_SUCCESS;
1078         struct swapout_io_completion *soc;
1079
1080         current_thread()->options |= TH_OPT_VMPRIV;
1081
1082         vm_swapout_thread_awakened++;
1083
1084         lck_mtx_lock_spin_always(c_list_lock);
1085
1086         vm_swapout_thread_running = TRUE;
1087 again:
1088         while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {
1089                 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1090
1091                 lck_mtx_lock_spin_always(&c_seg->c_lock);
1092
1093                 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1094
1095                 if (c_seg->c_busy) {
1096                         lck_mtx_unlock_always(c_list_lock);
1097
1098                         c_seg_wait_on_busy(c_seg);
1099
1100                         lck_mtx_lock_spin_always(c_list_lock);
1101
1102                         continue;
1103                 }
1104                 vm_swapout_thread_processed_segments++;
1105
1106                 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1107
1108                 if (size == 0) {
1109                         assert(c_seg->c_bytes_used == 0);
1110
1111                         if (!c_seg->c_on_minorcompact_q) {
1112                                 c_seg_need_delayed_compaction(c_seg, TRUE);
1113                         }
1114
1115                         c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1116                         lck_mtx_unlock_always(&c_seg->c_lock);
1117                         lck_mtx_unlock_always(c_list_lock);
1118
1119                         vm_swapout_found_empty++;
1120                         goto c_seg_is_empty;
1121                 }
1122                 C_SEG_BUSY(c_seg);
1123                 c_seg->c_busy_swapping = 1;
1124
1125                 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1126
1127                 lck_mtx_unlock_always(c_list_lock);
1128                 lck_mtx_unlock_always(&c_seg->c_lock);
1129
1130 #if CHECKSUM_THE_SWAP
1131                 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1132                 c_seg->cseg_swap_size = size;
1133 #endif /* CHECKSUM_THE_SWAP */
1134
1135 #if ENCRYPTED_SWAP
1136                 vm_swap_encrypt(c_seg);
1137 #endif /* ENCRYPTED_SWAP */
1138
1139                 soc = vm_swapout_find_free_soc();
1140                 assert(soc);
1141
1142                 soc->swp_upl_ctx.io_context = (void *)soc;
1143                 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1144                 soc->swp_upl_ctx.io_error = 0;
1145
1146                 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1147
1148                 if (kr != KERN_SUCCESS) {
1149                         if (soc->swp_io_done) {
1150                                 lck_mtx_lock_spin_always(c_list_lock);
1151
1152                                 soc->swp_io_done = 0;
1153                                 vm_swapout_soc_done--;
1154
1155                                 lck_mtx_unlock_always(c_list_lock);
1156                         }
1157                         vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1158                 } else {
1159                         soc->swp_io_busy = 1;
1160                         vm_swapout_soc_busy++;
1161                 }
1162
1163 c_seg_is_empty:
1164                 if (c_swapout_count == 0) {
1165                         vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1166                 }
1167
1168                 lck_mtx_lock_spin_always(c_list_lock);
1169
1170                 while ((soc = vm_swapout_find_done_soc())) {
1171                         vm_swapout_complete_soc(soc);
1172                 }
1173                 lck_mtx_unlock_always(c_list_lock);
1174
1175                 vm_swapout_thread_throttle_adjust();
1176
1177                 lck_mtx_lock_spin_always(c_list_lock);
1178         }
1179         while ((soc = vm_swapout_find_done_soc())) {
1180                 vm_swapout_complete_soc(soc);
1181         }
1182         lck_mtx_unlock_always(c_list_lock);
1183
1184         vm_pageout_io_throttle();
1185
1186         lck_mtx_lock_spin_always(c_list_lock);
1187
1188         /*
1189          * Recheck if we have some c_segs to wakeup
1190          * post throttle. And, check to see if we
1191          * have any more swapouts needed.
1192          */
1193         if (vm_swapout_soc_done) {
1194                 goto again;
1195         }
1196
1197         assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1198
1199         vm_swapout_thread_running = FALSE;
1200
1201         lck_mtx_unlock_always(c_list_lock);
1202
1203         thread_block((thread_continue_t)vm_swapout_thread);
1204
1205         /* NOTREACHED */
1206 }
1207
1208
1209 void
1210 vm_swapout_iodone(void *io_context, int error)
1211 {
1212         struct swapout_io_completion *soc;
1213
1214         soc = (struct swapout_io_completion *)io_context;
1215
1216         lck_mtx_lock_spin_always(c_list_lock);
1217
1218         soc->swp_io_done = 1;
1219         soc->swp_io_error = error;
1220         vm_swapout_soc_done++;
1221
1222         if (!vm_swapout_thread_running) {
1223                 thread_wakeup((event_t)&c_swapout_list_head);
1224         }
1225
1226         lck_mtx_unlock_always(c_list_lock);
1227 }
1228
1229
1230 static void
1231 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1232 {
1233         PAGE_REPLACEMENT_DISALLOWED(TRUE);
1234
1235         if (kr == KERN_SUCCESS) {
1236                 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size,
1237                     KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1238         }
1239 #if ENCRYPTED_SWAP
1240         else {
1241                 vm_swap_decrypt(c_seg);
1242         }
1243 #endif /* ENCRYPTED_SWAP */
1244         lck_mtx_lock_spin_always(c_list_lock);
1245         lck_mtx_lock_spin_always(&c_seg->c_lock);
1246
1247         if (kr == KERN_SUCCESS) {
1248                 int             new_state = C_ON_SWAPPEDOUT_Q;
1249                 boolean_t       insert_head = FALSE;
1250
1251                 if (hibernate_flushing == TRUE) {
1252                         if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1253                             c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1254                                 insert_head = TRUE;
1255                         }
1256                 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1257                         new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1258                 }
1259
1260                 c_seg_switch_state(c_seg, new_state, insert_head);
1261
1262                 c_seg->c_store.c_swap_handle = f_offset;
1263
1264                 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
1265
1266                 if (c_seg->c_bytes_used) {
1267                         OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1268                 }
1269
1270 #if CONFIG_FREEZE
1271                 /*
1272                  * Successful swapout. Decrement the in-core compressed pages count.
1273                  */
1274                 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1275                 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1276 #endif /* CONFIG_FREEZE */
1277         } else {
1278                 if (c_seg->c_overage_swap == TRUE) {
1279                         c_seg->c_overage_swap = FALSE;
1280                         c_overage_swapped_count--;
1281                 }
1282
1283 #if CONFIG_FREEZE
1284                 if (c_seg->c_task_owner) {
1285                         c_seg_update_task_owner(c_seg, NULL);
1286                 }
1287 #endif /* CONFIG_FREEZE */
1288
1289                 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1290
1291                 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1292                         c_seg_need_delayed_compaction(c_seg, TRUE);
1293                 }
1294         }
1295         assert(c_seg->c_busy_swapping);
1296         assert(c_seg->c_busy);
1297
1298         c_seg->c_busy_swapping = 0;
1299         lck_mtx_unlock_always(c_list_lock);
1300
1301         C_SEG_WAKEUP_DONE(c_seg);
1302         lck_mtx_unlock_always(&c_seg->c_lock);
1303
1304         PAGE_REPLACEMENT_DISALLOWED(FALSE);
1305 }
1306
1307
1308 boolean_t
1309 vm_swap_create_file()
1310 {
1311         uint64_t        size = 0;
1312         int             namelen = 0;
1313         boolean_t       swap_file_created = FALSE;
1314         boolean_t       swap_file_reuse = FALSE;
1315         boolean_t       swap_file_pin = FALSE;
1316         struct swapfile *swf = NULL;
1317
1318         /*
1319          * make sure we've got all the info we need
1320          * to potentially pin a swap file... we could
1321          * be swapping out due to hibernation w/o ever
1322          * having run vm_pageout_scan, which is normally
1323          * the trigger to do the init
1324          */
1325         vm_compaction_swapper_do_init();
1326
1327         /*
1328          * Any swapfile structure ready for re-use?
1329          */
1330
1331         lck_mtx_lock(&vm_swap_data_lock);
1332
1333         swf = (struct swapfile*) queue_first(&swf_global_queue);
1334
1335         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1336                 if (swf->swp_flags == SWAP_REUSE) {
1337                         swap_file_reuse = TRUE;
1338                         break;
1339                 }
1340                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1341         }
1342
1343         lck_mtx_unlock(&vm_swap_data_lock);
1344
1345         if (swap_file_reuse == FALSE) {
1346                 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1347
1348                 swf = kalloc_flags(sizeof *swf, Z_WAITOK | Z_ZERO);
1349                 swf->swp_index = vm_num_swap_files + 1;
1350                 swf->swp_pathlen = namelen;
1351                 swf->swp_path = kheap_alloc(KHEAP_DATA_BUFFERS, swf->swp_pathlen,
1352                     Z_WAITOK | Z_ZERO);
1353
1354                 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1355         }
1356
1357         vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1358
1359         if (swf->swp_vp == NULL) {
1360                 if (swap_file_reuse == FALSE) {
1361                         kheap_free(KHEAP_DATA_BUFFERS, swf->swp_path, swf->swp_pathlen);
1362                         kfree(swf, sizeof *swf);
1363                 }
1364                 return FALSE;
1365         }
1366         vm_swapfile_can_be_created = TRUE;
1367
1368         size = MAX_SWAP_FILE_SIZE;
1369
1370         while (size >= MIN_SWAP_FILE_SIZE) {
1371                 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1372
1373                 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1374                         int num_bytes_for_bitmap = 0;
1375
1376                         swap_file_created = TRUE;
1377
1378                         swf->swp_size = size;
1379                         swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
1380                         swf->swp_nseginuse = 0;
1381                         swf->swp_free_hint = 0;
1382
1383                         num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1384                         /*
1385                          * Allocate a bitmap that describes the
1386                          * number of segments held by this swapfile.
1387                          */
1388                         swf->swp_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
1389                             num_bytes_for_bitmap, Z_WAITOK | Z_ZERO);
1390
1391                         swf->swp_csegs = kalloc_flags(swf->swp_nsegs * sizeof(c_segment_t),
1392                             Z_WAITOK | Z_ZERO);
1393
1394                         /*
1395                          * passing a NULL trim_list into vnode_trim_list
1396                          * will return ENOTSUP if trim isn't supported
1397                          * and 0 if it is
1398                          */
1399                         if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1400                                 swp_trim_supported = TRUE;
1401                         }
1402
1403                         lck_mtx_lock(&vm_swap_data_lock);
1404
1405                         swf->swp_flags = SWAP_READY;
1406
1407                         if (swap_file_reuse == FALSE) {
1408                                 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1409                         }
1410
1411                         vm_num_swap_files++;
1412
1413                         vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1414
1415                         if (swap_file_pin == TRUE) {
1416                                 vm_num_pinned_swap_files++;
1417                                 swf->swp_flags |= SWAP_PINNED;
1418                                 vm_swappin_avail -= swf->swp_size;
1419                         }
1420
1421                         lck_mtx_unlock(&vm_swap_data_lock);
1422
1423                         thread_wakeup((event_t) &vm_num_swap_files);
1424 #if CONFIG_EMBEDDED
1425                         if (vm_num_swap_files == 1) {
1426                                 c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;
1427
1428                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1429                                         c_overage_swapped_limit /= 2;
1430                                 }
1431                         }
1432 #endif
1433                         break;
1434                 } else {
1435                         size = size / 2;
1436                 }
1437         }
1438         if (swap_file_created == FALSE) {
1439                 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1440
1441                 swf->swp_vp = NULL;
1442
1443                 if (swap_file_reuse == FALSE) {
1444                         kheap_free(KHEAP_DATA_BUFFERS, swf->swp_path, swf->swp_pathlen);
1445                         kfree(swf, sizeof *swf);
1446                 }
1447         }
1448         return swap_file_created;
1449 }
1450
1451 extern void vnode_put(struct vnode* vp);
1452 kern_return_t
1453 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1454 {
1455         struct swapfile *swf = NULL;
1456         uint64_t        file_offset = 0;
1457         int             retval = 0;
1458
1459         assert(c_seg->c_store.c_buffer);
1460
1461         lck_mtx_lock(&vm_swap_data_lock);
1462
1463         swf = vm_swapfile_for_handle(f_offset);
1464
1465         if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1466                 vm_swap_get_failures++;
1467                 retval = 1;
1468                 goto done;
1469         }
1470         swf->swp_io_count++;
1471
1472         lck_mtx_unlock(&vm_swap_data_lock);
1473
1474 #if DEVELOPMENT || DEBUG
1475         C_SEG_MAKE_WRITEABLE(c_seg);
1476 #endif
1477         file_offset = (f_offset & SWAP_SLOT_MASK);
1478
1479         if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1480                 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1481         } else {
1482                 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1483                 vnode_put(swf->swp_vp);
1484         }
1485
1486 #if DEVELOPMENT || DEBUG
1487         C_SEG_WRITE_PROTECT(c_seg);
1488 #endif
1489         if (retval == 0) {
1490                 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
1491         } else {
1492                 vm_swap_get_failures++;
1493         }
1494
1495         /*
1496          * Free this slot in the swap structure.
1497          */
1498         vm_swap_free(f_offset);
1499
1500         lck_mtx_lock(&vm_swap_data_lock);
1501         swf->swp_io_count--;
1502
1503         if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1504                 swf->swp_flags &= ~SWAP_WANTED;
1505                 thread_wakeup((event_t) &swf->swp_flags);
1506         }
1507 done:
1508         lck_mtx_unlock(&vm_swap_data_lock);
1509
1510         if (retval == 0) {
1511                 return KERN_SUCCESS;
1512         } else {
1513                 return KERN_FAILURE;
1514         }
1515 }
1516
1517 kern_return_t
1518 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1519 {
1520         unsigned int    segidx = 0;
1521         struct swapfile *swf = NULL;
1522         uint64_t        file_offset = 0;
1523         uint64_t        swapfile_index = 0;
1524         unsigned int    byte_for_segidx = 0;
1525         unsigned int    offset_within_byte = 0;
1526         boolean_t       swf_eligible = FALSE;
1527         boolean_t       waiting = FALSE;
1528         boolean_t       retried = FALSE;
1529         int             error = 0;
1530         clock_sec_t     sec;
1531         clock_nsec_t    nsec;
1532         void            *upl_ctx = NULL;
1533         boolean_t       drop_iocount = FALSE;
1534
1535         if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1536                 return KERN_FAILURE;
1537         }
1538 retry:
1539         lck_mtx_lock(&vm_swap_data_lock);
1540
1541         swf = (struct swapfile*) queue_first(&swf_global_queue);
1542
1543         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1544                 segidx = swf->swp_free_hint;
1545
1546                 swf_eligible =  (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1547
1548                 if (swf_eligible) {
1549                         while (segidx < swf->swp_nsegs) {
1550                                 byte_for_segidx = segidx >> 3;
1551                                 offset_within_byte = segidx % 8;
1552
1553                                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1554                                         segidx++;
1555                                         continue;
1556                                 }
1557
1558                                 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1559
1560                                 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1561                                 swf->swp_nseginuse++;
1562                                 swf->swp_io_count++;
1563                                 swf->swp_csegs[segidx] = c_seg;
1564
1565                                 swapfile_index = swf->swp_index;
1566                                 vm_swapfile_total_segs_used++;
1567
1568                                 clock_get_system_nanotime(&sec, &nsec);
1569
1570                                 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1571                                         thread_wakeup((event_t) &vm_swapfile_create_needed);
1572                                 }
1573
1574                                 lck_mtx_unlock(&vm_swap_data_lock);
1575
1576                                 goto issue_io;
1577                         }
1578                 }
1579                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1580         }
1581         assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1582
1583         /*
1584          * we've run out of swap segments, but may not
1585          * be in a position to immediately create a new swap
1586          * file if we've recently failed to create due to a lack
1587          * of free space in the root filesystem... we'll try
1588          * to kick that create off, but in any event we're going
1589          * to take a breather (up to 1 second) so that we're not caught in a tight
1590          * loop back in "vm_compressor_compact_and_swap" trying to stuff
1591          * segments into swap files only to have them immediately put back
1592          * on the c_age queue due to vm_swap_put failing.
1593          *
1594          * if we're doing these puts due to a hibernation flush,
1595          * no need to block... setting hibernate_no_swapspace to TRUE,
1596          * will cause "vm_compressor_compact_and_swap" to immediately abort
1597          */
1598         clock_get_system_nanotime(&sec, &nsec);
1599
1600         if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1601                 thread_wakeup((event_t) &vm_swapfile_create_needed);
1602         }
1603
1604         if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1605                 waiting = TRUE;
1606                 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1607         } else {
1608                 hibernate_no_swapspace = TRUE;
1609         }
1610
1611         lck_mtx_unlock(&vm_swap_data_lock);
1612
1613         if (waiting == TRUE) {
1614                 thread_block(THREAD_CONTINUE_NULL);
1615
1616                 if (retried == FALSE && hibernate_flushing == TRUE) {
1617                         retried = TRUE;
1618                         goto retry;
1619                 }
1620         }
1621         vm_swap_put_failures_no_swap_file++;
1622
1623         return KERN_FAILURE;
1624
1625 issue_io:
1626         assert(c_seg->c_busy_swapping);
1627         assert(c_seg->c_busy);
1628         assert(!c_seg->c_on_minorcompact_q);
1629
1630         *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1631
1632         if (soc) {
1633                 soc->swp_c_seg = c_seg;
1634                 soc->swp_c_size = size;
1635
1636                 soc->swp_swf = swf;
1637
1638                 soc->swp_io_error = 0;
1639                 soc->swp_io_done = 0;
1640
1641                 upl_ctx = (void *)&soc->swp_upl_ctx;
1642         }
1643
1644         if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1645                 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1646         } else {
1647                 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1648                 drop_iocount = TRUE;
1649         }
1650
1651         if (error || upl_ctx == NULL) {
1652                 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1653         }
1654
1655         return KERN_SUCCESS;
1656 }
1657
1658 kern_return_t
1659 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1660 {
1661         if (drop_iocount) {
1662                 vnode_put(swf->swp_vp);
1663         }
1664
1665         lck_mtx_lock(&vm_swap_data_lock);
1666
1667         swf->swp_io_count--;
1668
1669         if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1670                 swf->swp_flags &= ~SWAP_WANTED;
1671                 thread_wakeup((event_t) &swf->swp_flags);
1672         }
1673         lck_mtx_unlock(&vm_swap_data_lock);
1674
1675         if (error) {
1676                 vm_swap_free(*f_offset);
1677                 vm_swap_put_failures++;
1678
1679                 return KERN_FAILURE;
1680         }
1681         return KERN_SUCCESS;
1682 }
1683
1684
1685 static void
1686 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1687 {
1688         uint64_t        file_offset = 0;
1689         unsigned int    segidx = 0;
1690
1691
1692         if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1693                 unsigned int byte_for_segidx = 0;
1694                 unsigned int offset_within_byte = 0;
1695
1696                 file_offset = (f_offset & SWAP_SLOT_MASK);
1697                 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1698
1699                 byte_for_segidx = segidx >> 3;
1700                 offset_within_byte = segidx % 8;
1701
1702                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1703                         (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1704
1705                         swf->swp_csegs[segidx] = NULL;
1706
1707                         swf->swp_nseginuse--;
1708                         vm_swapfile_total_segs_used--;
1709
1710                         if (segidx < swf->swp_free_hint) {
1711                                 swf->swp_free_hint = segidx;
1712                         }
1713                 }
1714                 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1715                         thread_wakeup((event_t) &vm_swapfile_gc_needed);
1716                 }
1717         }
1718 }
1719
1720
1721 uint32_t vm_swap_free_now_count = 0;
1722 uint32_t vm_swap_free_delayed_count = 0;
1723
1724
1725 void
1726 vm_swap_free(uint64_t f_offset)
1727 {
1728         struct swapfile *swf = NULL;
1729         struct trim_list *tl = NULL;
1730         clock_sec_t     sec;
1731         clock_nsec_t    nsec;
1732
1733         if (swp_trim_supported == TRUE) {
1734                 tl = kalloc(sizeof(struct trim_list));
1735         }
1736
1737         lck_mtx_lock(&vm_swap_data_lock);
1738
1739         swf = vm_swapfile_for_handle(f_offset);
1740
1741         if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1742                 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1743                         /*
1744                          * don't delay the free if the underlying disk doesn't support
1745                          * trim, or we're in the midst of reclaiming this swap file since
1746                          * we don't want to move segments that are technically free
1747                          * but not yet handled by the delayed free mechanism
1748                          */
1749                         vm_swap_free_now(swf, f_offset);
1750
1751                         vm_swap_free_now_count++;
1752                         goto done;
1753                 }
1754                 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1755                 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1756
1757                 tl->tl_next = swf->swp_delayed_trim_list_head;
1758                 swf->swp_delayed_trim_list_head = tl;
1759                 swf->swp_delayed_trim_count++;
1760                 tl = NULL;
1761
1762                 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1763                         clock_get_system_nanotime(&sec, &nsec);
1764
1765                         if (sec > dont_trim_until_ts) {
1766                                 thread_wakeup((event_t) &vm_swapfile_create_needed);
1767                         }
1768                 }
1769                 vm_swap_free_delayed_count++;
1770         }
1771 done:
1772         lck_mtx_unlock(&vm_swap_data_lock);
1773
1774         if (tl != NULL) {
1775                 kfree(tl, sizeof(struct trim_list));
1776         }
1777 }
1778
1779
1780 static void
1781 vm_swap_wait_on_trim_handling_in_progress()
1782 {
1783         while (delayed_trim_handling_in_progress == TRUE) {
1784                 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1785                 lck_mtx_unlock(&vm_swap_data_lock);
1786
1787                 thread_block(THREAD_CONTINUE_NULL);
1788
1789                 lck_mtx_lock(&vm_swap_data_lock);
1790         }
1791 }
1792
1793
1794 static void
1795 vm_swap_handle_delayed_trims(boolean_t force_now)
1796 {
1797         struct swapfile *swf = NULL;
1798
1799         /*
1800          * serialize the race between us and vm_swap_reclaim...
1801          * if vm_swap_reclaim wins it will turn off SWAP_READY
1802          * on the victim it has chosen... we can just skip over
1803          * that file since vm_swap_reclaim will first process
1804          * all of the delayed trims associated with it
1805          */
1806
1807         if (compressor_store_stop_compaction == TRUE) {
1808                 return;
1809         }
1810
1811         lck_mtx_lock(&vm_swap_data_lock);
1812
1813         delayed_trim_handling_in_progress = TRUE;
1814
1815         lck_mtx_unlock(&vm_swap_data_lock);
1816
1817         /*
1818          * no need to hold the lock to walk the swf list since
1819          * vm_swap_create (the only place where we add to this list)
1820          * is run on the same thread as this function
1821          * and vm_swap_reclaim doesn't remove items from this list
1822          * instead marking them with SWAP_REUSE for future re-use
1823          */
1824         swf = (struct swapfile*) queue_first(&swf_global_queue);
1825
1826         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1827                 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1828                         assert(!(swf->swp_flags & SWAP_RECLAIM));
1829                         vm_swap_do_delayed_trim(swf);
1830                 }
1831                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1832         }
1833         lck_mtx_lock(&vm_swap_data_lock);
1834
1835         delayed_trim_handling_in_progress = FALSE;
1836         thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1837
1838         if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1839                 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1840         }
1841
1842         lck_mtx_unlock(&vm_swap_data_lock);
1843 }
1844
1845 static void
1846 vm_swap_do_delayed_trim(struct swapfile *swf)
1847 {
1848         struct trim_list *tl, *tl_head;
1849         int error;
1850
1851         if (compressor_store_stop_compaction == TRUE) {
1852                 return;
1853         }
1854
1855         if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1856                 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
1857                 return;
1858         }
1859
1860         lck_mtx_lock(&vm_swap_data_lock);
1861
1862         tl_head = swf->swp_delayed_trim_list_head;
1863         swf->swp_delayed_trim_list_head = NULL;
1864         swf->swp_delayed_trim_count = 0;
1865
1866         lck_mtx_unlock(&vm_swap_data_lock);
1867
1868         vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1869
1870         (void) vnode_put(swf->swp_vp);
1871
1872         while ((tl = tl_head) != NULL) {
1873                 unsigned int    segidx = 0;
1874                 unsigned int    byte_for_segidx = 0;
1875                 unsigned int    offset_within_byte = 0;
1876
1877                 lck_mtx_lock(&vm_swap_data_lock);
1878
1879                 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1880
1881                 byte_for_segidx = segidx >> 3;
1882                 offset_within_byte = segidx % 8;
1883
1884                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1885                         (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1886
1887                         swf->swp_csegs[segidx] = NULL;
1888
1889                         swf->swp_nseginuse--;
1890                         vm_swapfile_total_segs_used--;
1891
1892                         if (segidx < swf->swp_free_hint) {
1893                                 swf->swp_free_hint = segidx;
1894                         }
1895                 }
1896                 lck_mtx_unlock(&vm_swap_data_lock);
1897
1898                 tl_head = tl->tl_next;
1899
1900                 kfree(tl, sizeof(struct trim_list));
1901         }
1902 }
1903
1904
1905 void
1906 vm_swap_flush()
1907 {
1908         return;
1909 }
1910
1911 int     vm_swap_reclaim_yielded = 0;
1912
1913 void
1914 vm_swap_reclaim(void)
1915 {
1916         vm_offset_t     addr = 0;
1917         unsigned int    segidx = 0;
1918         uint64_t        f_offset = 0;
1919         struct swapfile *swf = NULL;
1920         struct swapfile *smallest_swf = NULL;
1921         unsigned int    min_nsegs = 0;
1922         unsigned int    byte_for_segidx = 0;
1923         unsigned int    offset_within_byte = 0;
1924         uint32_t        c_size = 0;
1925
1926         c_segment_t     c_seg = NULL;
1927
1928         if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1929                 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1930         }
1931
1932         lck_mtx_lock(&vm_swap_data_lock);
1933
1934         /*
1935          * if we're running the swapfile list looking for
1936          * candidates with delayed trims, we need to
1937          * wait before making our decision concerning
1938          * the swapfile we want to reclaim
1939          */
1940         vm_swap_wait_on_trim_handling_in_progress();
1941
1942         /*
1943          * from here until we knock down the SWAP_READY bit,
1944          * we need to remain behind the vm_swap_data_lock...
1945          * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1946          * will not consider this swapfile for processing
1947          */
1948         swf = (struct swapfile*) queue_first(&swf_global_queue);
1949         min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1950         smallest_swf = NULL;
1951
1952         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1953                 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1954                         smallest_swf = swf;
1955                         min_nsegs = swf->swp_nseginuse;
1956                 }
1957                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1958         }
1959
1960         if (smallest_swf == NULL) {
1961                 goto done;
1962         }
1963
1964         swf = smallest_swf;
1965
1966
1967         swf->swp_flags &= ~SWAP_READY;
1968         swf->swp_flags |= SWAP_RECLAIM;
1969
1970         if (swf->swp_delayed_trim_count) {
1971                 lck_mtx_unlock(&vm_swap_data_lock);
1972
1973                 vm_swap_do_delayed_trim(swf);
1974
1975                 lck_mtx_lock(&vm_swap_data_lock);
1976         }
1977         segidx = 0;
1978
1979         while (segidx < swf->swp_nsegs) {
1980 ReTry_for_cseg:
1981                 /*
1982                  * Wait for outgoing I/Os.
1983                  */
1984                 while (swf->swp_io_count) {
1985                         swf->swp_flags |= SWAP_WANTED;
1986
1987                         assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1988                         lck_mtx_unlock(&vm_swap_data_lock);
1989
1990                         thread_block(THREAD_CONTINUE_NULL);
1991
1992                         lck_mtx_lock(&vm_swap_data_lock);
1993                 }
1994                 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1995                         vm_swap_reclaim_yielded++;
1996                         break;
1997                 }
1998
1999                 byte_for_segidx = segidx >> 3;
2000                 offset_within_byte = segidx % 8;
2001
2002                 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2003                         segidx++;
2004                         continue;
2005                 }
2006
2007                 c_seg = swf->swp_csegs[segidx];
2008                 assert(c_seg);
2009
2010                 lck_mtx_lock_spin_always(&c_seg->c_lock);
2011
2012                 if (c_seg->c_busy) {
2013                         /*
2014                          * a swapped out c_segment in the process of being freed will remain in the
2015                          * busy state until after the vm_swap_free is called on it... vm_swap_free
2016                          * takes the vm_swap_data_lock, so can't change the swap state until after
2017                          * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2018                          * which will allow c_seg_free_locked to clear busy and wake up this thread...
2019                          * at that point, we re-look up the swap state which will now indicate that
2020                          * this c_segment no longer exists.
2021                          */
2022                         c_seg->c_wanted = 1;
2023
2024                         assert_wait((event_t) (c_seg), THREAD_UNINT);
2025                         lck_mtx_unlock_always(&c_seg->c_lock);
2026
2027                         lck_mtx_unlock(&vm_swap_data_lock);
2028
2029                         thread_block(THREAD_CONTINUE_NULL);
2030
2031                         lck_mtx_lock(&vm_swap_data_lock);
2032
2033                         goto ReTry_for_cseg;
2034                 }
2035                 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2036
2037                 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
2038
2039                 assert(c_seg == swf->swp_csegs[segidx]);
2040                 swf->swp_csegs[segidx] = NULL;
2041                 swf->swp_nseginuse--;
2042
2043                 vm_swapfile_total_segs_used--;
2044
2045                 lck_mtx_unlock(&vm_swap_data_lock);
2046
2047                 assert(C_SEG_IS_ONDISK(c_seg));
2048
2049                 C_SEG_BUSY(c_seg);
2050                 c_seg->c_busy_swapping = 1;
2051 #if !CHECKSUM_THE_SWAP
2052                 c_seg_trim_tail(c_seg);
2053 #endif
2054                 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2055
2056                 assert(c_size <= C_SEG_BUFSIZE && c_size);
2057
2058                 lck_mtx_unlock_always(&c_seg->c_lock);
2059
2060                 if (vnode_getwithref(swf->swp_vp)) {
2061                         printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2062                         vm_swap_get_failures++;
2063                         goto swap_io_failed;
2064                 } else {
2065                         if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2066                                 /*
2067                                  * reading the data back in failed, so convert c_seg
2068                                  * to a swapped in c_segment that contains no data
2069                                  */
2070                                 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2071                                 /*
2072                                  * returns with c_busy_swapping cleared
2073                                  */
2074                                 vnode_put(swf->swp_vp);
2075                                 vm_swap_get_failures++;
2076                                 goto swap_io_failed;
2077                         }
2078                         vnode_put(swf->swp_vp);
2079                 }
2080
2081                 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
2082
2083                 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2084                         vm_offset_t     c_buffer;
2085
2086                         /*
2087                          * the put failed, so convert c_seg to a fully swapped in c_segment
2088                          * with valid data
2089                          */
2090                         c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2091
2092                         kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
2093
2094                         memcpy((char *)c_buffer, (char *)addr, c_size);
2095
2096                         c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2097 #if ENCRYPTED_SWAP
2098                         vm_swap_decrypt(c_seg);
2099 #endif /* ENCRYPTED_SWAP */
2100                         c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2101                         /*
2102                          * returns with c_busy_swapping cleared
2103                          */
2104                         OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2105
2106                         goto swap_io_failed;
2107                 }
2108                 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
2109
2110                 lck_mtx_lock_spin_always(&c_seg->c_lock);
2111
2112                 assert(C_SEG_IS_ONDISK(c_seg));
2113                 /*
2114                  * The c_seg will now know about the new location on disk.
2115                  */
2116                 c_seg->c_store.c_swap_handle = f_offset;
2117
2118                 assert(c_seg->c_busy_swapping);
2119                 c_seg->c_busy_swapping = 0;
2120 swap_io_failed:
2121                 assert(c_seg->c_busy);
2122                 C_SEG_WAKEUP_DONE(c_seg);
2123
2124                 lck_mtx_unlock_always(&c_seg->c_lock);
2125                 lck_mtx_lock(&vm_swap_data_lock);
2126         }
2127
2128         if (swf->swp_nseginuse) {
2129                 swf->swp_flags &= ~SWAP_RECLAIM;
2130                 swf->swp_flags |= SWAP_READY;
2131
2132                 goto done;
2133         }
2134         /*
2135          * We don't remove this inactive swf from the queue.
2136          * That way, we can re-use it when needed again and
2137          * preserve the namespace. The delayed_trim processing
2138          * is also dependent on us not removing swfs from the queue.
2139          */
2140         //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2141
2142         vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2143
2144         lck_mtx_unlock(&vm_swap_data_lock);
2145
2146         vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2147
2148         kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
2149         kheap_free(KHEAP_DATA_BUFFERS, swf->swp_bitmap,
2150             MAX((swf->swp_nsegs >> 3), 1));
2151
2152         lck_mtx_lock(&vm_swap_data_lock);
2153
2154         if (swf->swp_flags & SWAP_PINNED) {
2155                 vm_num_pinned_swap_files--;
2156                 vm_swappin_avail += swf->swp_size;
2157         }
2158
2159         swf->swp_vp = NULL;
2160         swf->swp_size = 0;
2161         swf->swp_free_hint = 0;
2162         swf->swp_nsegs = 0;
2163         swf->swp_flags = SWAP_REUSE;
2164
2165         vm_num_swap_files--;
2166
2167 done:
2168         thread_wakeup((event_t) &swf->swp_flags);
2169         lck_mtx_unlock(&vm_swap_data_lock);
2170
2171         kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
2172 }
2173
2174
2175 uint64_t
2176 vm_swap_get_total_space(void)
2177 {
2178         uint64_t total_space = 0;
2179
2180         total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
2181
2182         return total_space;
2183 }
2184
2185 uint64_t
2186 vm_swap_get_used_space(void)
2187 {
2188         uint64_t used_space = 0;
2189
2190         used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
2191
2192         return used_space;
2193 }
2194
2195 uint64_t
2196 vm_swap_get_free_space(void)
2197 {
2198         return vm_swap_get_total_space() - vm_swap_get_used_space();
2199 }
2200
2201 uint64_t
2202 vm_swap_get_max_configured_space(void)
2203 {
2204         int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2205         return num_swap_files * MAX_SWAP_FILE_SIZE;
2206 }
2207
2208 int
2209 vm_swap_low_on_space(void)
2210 {
2211         if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2212                 return 0;
2213         }
2214
2215         if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
2216                 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2217                         return 0;
2218                 }
2219
2220                 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2221                         return 1;
2222                 }
2223         }
2224         return 0;
2225 }
2226
2227 int
2228 vm_swap_out_of_space(void)
2229 {
2230         if ((vm_num_swap_files == vm_num_swap_files_config) &&
2231             ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2232                 /*
2233                  * Last swapfile and we have only space for the
2234                  * last few swapouts.
2235                  */
2236                 return 1;
2237         }
2238
2239         return 0;
2240 }
2241
2242 boolean_t
2243 vm_swap_files_pinned(void)
2244 {
2245         boolean_t result;
2246
2247         if (vm_swappin_enabled == FALSE) {
2248                 return TRUE;
2249         }
2250
2251         result = (vm_num_pinned_swap_files == vm_num_swap_files);
2252
2253         return result;
2254 }
2255
2256 #if CONFIG_FREEZE
2257 boolean_t
2258 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2259 {
2260         boolean_t       use_device_value = FALSE;
2261         struct swapfile *swf = NULL;
2262
2263         if (vm_num_swap_files) {
2264                 lck_mtx_lock(&vm_swap_data_lock);
2265
2266                 swf = (struct swapfile*) queue_first(&swf_global_queue);
2267
2268                 if (swf) {
2269                         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2270                                 if (swf->swp_flags == SWAP_READY) {
2271                                         assert(swf->swp_vp);
2272
2273                                         if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2274                                                 use_device_value = TRUE;
2275                                         }
2276                                         break;
2277                                 }
2278                                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2279                         }
2280                 }
2281
2282                 lck_mtx_unlock(&vm_swap_data_lock);
2283         } else {
2284                 /*
2285                  * This block is used for the initial budget value before any swap files
2286                  * are created. We create a temp swap file to get the budget.
2287                  */
2288
2289                 struct vnode *temp_vp = NULL;
2290
2291                 vm_swapfile_open(swapfilename, &temp_vp);
2292
2293                 if (temp_vp) {
2294                         if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2295                                 use_device_value = TRUE;
2296                         }
2297
2298                         vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2299                         temp_vp = NULL;
2300                 } else {
2301                         *freeze_daily_budget = 0;
2302                 }
2303         }
2304
2305         return use_device_value;
2306 }
2307 #endif /* CONFIG_FREEZE */