osfmk/vm/vm_compressor_backing_store.c

   1 /*
   2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include "vm_compressor_backing_store.h"
  30 #include <vm/vm_protos.h>
  31
  32 #include <IOKit/IOHibernatePrivate.h>
  33
  34
  35 boolean_t       compressor_store_stop_compaction = FALSE;
  36 boolean_t       vm_swap_up = FALSE;
  37 boolean_t       vm_swapfile_create_needed = FALSE;
  38 boolean_t       vm_swapfile_gc_needed = FALSE;
  39
  40 int             swapper_throttle = -1;
  41 boolean_t       swapper_throttle_inited = FALSE;
  42 uint64_t        vm_swapout_thread_id;
  43
  44 uint64_t        vm_swap_put_failures = 0;
  45 uint64_t        vm_swap_get_failures = 0;
  46 int             vm_num_swap_files = 0;
  47 int             vm_swapout_thread_processed_segments = 0;
  48 int             vm_swapout_thread_awakened = 0;
  49 int             vm_swapfile_create_thread_awakened = 0;
  50 int             vm_swapfile_create_thread_running = 0;
  51 int             vm_swapfile_gc_thread_awakened = 0;
  52 int             vm_swapfile_gc_thread_running = 0;
  53
  54 int64_t         vm_swappin_avail = 0;
  55 unsigned int    vm_swapfile_total_segs_alloced = 0;
  56 unsigned int    vm_swapfile_total_segs_used = 0;
  57
  58
  59 #define SWAP_READY      0x1     /* Swap file is ready to be used */
  60 #define SWAP_RECLAIM    0x2     /* Swap file is marked to be reclaimed */
  61 #define SWAP_WANTED     0x4     /* Swap file has waiters */
  62 #define SWAP_REUSE      0x8     /* Swap file is on the Q and has a name. Reuse after init-ing.*/
  63 #define SWAP_PINNED     0x10    /* Swap file is pinned (FusionDrive) */
  64
  65
  66 struct swapfile{
  67         queue_head_t            swp_queue;      /* list of swap files */
  68         char                    *swp_path;      /* saved pathname of swap file */
  69         struct vnode            *swp_vp;        /* backing vnode */
  70         uint64_t                swp_size;       /* size of this swap file */
  71         uint8_t                 *swp_bitmap;    /* bitmap showing the alloced/freed slots in the swap file */
  72         unsigned int            swp_pathlen;    /* length of pathname */
  73         unsigned int            swp_nsegs;      /* #segments we can use */
  74         unsigned int            swp_nseginuse;  /* #segments in use */
  75         unsigned int            swp_index;      /* index of this swap file */
  76         unsigned int            swp_flags;      /* state of swap file */
  77         unsigned int            swp_free_hint;  /* offset of 1st free chunk */
  78         unsigned int            swp_io_count;   /* count of outstanding I/Os */
  79         c_segment_t             *swp_csegs;     /* back pointers to the c_segments. Used during swap reclaim. */
  80
  81         struct trim_list        *swp_delayed_trim_list_head;
  82         unsigned int            swp_delayed_trim_count;
  83 };
  84
  85 queue_head_t    swf_global_queue;
  86 boolean_t       swp_trim_supported = FALSE;
  87
  88 extern clock_sec_t      dont_trim_until_ts;
  89 clock_sec_t             vm_swapfile_last_failed_to_create_ts = 0;
  90 clock_sec_t             vm_swapfile_last_successful_create_ts = 0;
  91 int                     vm_swapfile_can_be_created = FALSE;
  92 boolean_t               delayed_trim_handling_in_progress = FALSE;
  93
  94 static void vm_swapout_thread_throttle_adjust(void);
  95 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
  96 static void vm_swapout_thread(void);
  97 static void vm_swapfile_create_thread(void);
  98 static void vm_swapfile_gc_thread(void);
  99 static void vm_swap_defragment();
 100 static void vm_swap_handle_delayed_trims(boolean_t);
 101 static void vm_swap_do_delayed_trim();
 102 static void vm_swap_wait_on_trim_handling_in_progress(void);
 103
 104
 105
 106 #define VM_MAX_SWAP_FILE_NUM            100
 107 #define VM_SWAPFILE_DELAYED_TRIM_MAX    128
 108
 109 #define VM_SWAP_SHOULD_DEFRAGMENT()     (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
 110 #define VM_SWAP_SHOULD_RECLAIM()        (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
 111 #define VM_SWAP_SHOULD_ABORT_RECLAIM()  (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
 112 #define VM_SWAP_SHOULD_PIN(_size)       (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
 113 #define VM_SWAP_SHOULD_CREATE(cur_ts)   ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
 114                                          ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
 115 #define VM_SWAP_SHOULD_TRIM(swf)        ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
 116
 117
 118 #define VM_SWAPFILE_DELAYED_CREATE      15
 119
 120 #define VM_SWAP_BUSY()  ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
 121
 122
 123 #if CHECKSUM_THE_SWAP
 124 extern unsigned int hash_string(char *cp, int len);
 125 #endif
 126
 127 #if RECORD_THE_COMPRESSED_DATA
 128 boolean_t       c_compressed_record_init_done = FALSE;
 129 int             c_compressed_record_write_error = 0;
 130 struct vnode    *c_compressed_record_vp = NULL;
 131 uint64_t        c_compressed_record_file_offset = 0;
 132 void    c_compressed_record_init(void);
 133 void    c_compressed_record_write(char *, int);
 134 #endif
 135
 136 #if ENCRYPTED_SWAP
 137 extern boolean_t                swap_crypt_ctx_initialized;
 138 extern void                     swap_crypt_ctx_initialize(void);
 139 extern const unsigned char      swap_crypt_null_iv[AES_BLOCK_SIZE];
 140 extern aes_ctx                  swap_crypt_ctx;
 141 extern unsigned long            vm_page_encrypt_counter;
 142 extern unsigned long            vm_page_decrypt_counter;
 143 #endif /* ENCRYPTED_SWAP */
 144
 145 extern void                     vm_pageout_io_throttle(void);
 146 extern void                     vm_pageout_reinit_tuneables(void);
 147 extern void                     vm_swap_file_set_tuneables(void);
 148
 149 struct swapfile *vm_swapfile_for_handle(uint64_t);
 150
 151 /*
 152  * Called with the vm_swap_data_lock held.
 153  */
 154
 155 struct swapfile *
 156 vm_swapfile_for_handle(uint64_t f_offset)
 157 {
 158
 159         uint64_t                file_offset = 0;
 160         unsigned int            swapfile_index = 0;
 161         struct swapfile*        swf = NULL;
 162
 163         file_offset = (f_offset & SWAP_SLOT_MASK);
 164         swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
 165
 166         swf = (struct swapfile*) queue_first(&swf_global_queue);
 167
 168         while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
 169
 170                 if (swapfile_index == swf->swp_index) {
 171                         break;
 172                 }
 173
 174                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
 175         }
 176
 177         if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
 178                 swf = NULL;
 179         }
 180
 181         return swf;
 182 }
 183
 184 void
 185 vm_compressor_swap_init()
 186 {
 187         thread_t        thread = NULL;
 188
 189         lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
 190         lck_grp_init(&vm_swap_data_lock_grp,
 191                      "vm_swap_data",
 192                      &vm_swap_data_lock_grp_attr);
 193         lck_attr_setdefault(&vm_swap_data_lock_attr);
 194         lck_mtx_init_ext(&vm_swap_data_lock,
 195                          &vm_swap_data_lock_ext,
 196                          &vm_swap_data_lock_grp,
 197                          &vm_swap_data_lock_attr);
 198
 199         queue_init(&swf_global_queue);
 200
 201
 202         if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
 203                                          BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
 204                 panic("vm_swapout_thread: create failed");
 205         }
 206         vm_swapout_thread_id = thread->thread_id;
 207
 208         thread_deallocate(thread);
 209
 210         if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
 211                                  BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
 212                 panic("vm_swapfile_create_thread: create failed");
 213         }
 214
 215         thread_deallocate(thread);
 216
 217         if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
 218                                  BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
 219                 panic("vm_swapfile_gc_thread: create failed");
 220         }
 221         thread_deallocate(thread);
 222
 223         proc_set_task_policy_thread(kernel_task, thread->thread_id,
 224                                     TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
 225         proc_set_task_policy_thread(kernel_task, thread->thread_id,
 226                                     TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 227
 228 #if ENCRYPTED_SWAP
 229         if (swap_crypt_ctx_initialized == FALSE) {
 230                 swap_crypt_ctx_initialize();
 231         }
 232 #endif /* ENCRYPTED_SWAP */
 233
 234         memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1);
 235
 236         vm_swap_up = TRUE;
 237
 238         printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF");
 239 }
 240
 241
 242 #if RECORD_THE_COMPRESSED_DATA
 243
 244 void
 245 c_compressed_record_init()
 246 {
 247         if (c_compressed_record_init_done == FALSE) {
 248                 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
 249                 c_compressed_record_init_done = TRUE;
 250         }
 251 }
 252
 253 void
 254 c_compressed_record_write(char *buf, int size)
 255 {
 256         if (c_compressed_record_write_error == 0) {
 257                 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
 258                 c_compressed_record_file_offset += size;
 259         }
 260 }
 261 #endif
 262
 263
 264
 265 void
 266 vm_swap_file_set_tuneables()
 267 {
 268         struct  vnode *vp;
 269         char    *pathname;
 270         int     namelen;
 271
 272         if (strlen(swapfilename) == 0) {
 273                 /*
 274                  * If no swapfile name has been set, we'll
 275                  * use the default name.
 276                  *
 277                  * Also, this function is only called from the vm_pageout_scan thread
 278                  * via vm_consider_waking_compactor_swapper,
 279                  * so we don't need to worry about a race in checking/setting the name here.
 280                  */
 281                 strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
 282         }
 283         namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
 284         pathname = (char*)kalloc(namelen);
 285         memset(pathname, 0, namelen);
 286         snprintf(pathname, namelen, "%s%d", swapfilename, 0);
 287
 288         vm_swapfile_open(pathname, &vp);
 289
 290         if (vp == NULL)
 291                 goto done;
 292
 293         if (vnode_pager_isSSD(vp) == FALSE)
 294                 vm_pageout_reinit_tuneables();
 295         vnode_setswapmount(vp);
 296         vm_swappin_avail = vnode_getswappin_avail(vp);
 297         vm_swapfile_close((uint64_t)pathname, vp);
 298 done:
 299         kfree(pathname, namelen);
 300 }
 301
 302
 303 #if ENCRYPTED_SWAP
 304 void
 305 vm_swap_encrypt(c_segment_t c_seg)
 306 {
 307         vm_offset_t     kernel_vaddr = 0;
 308         uint64_t        size = 0;
 309
 310         union {
 311                 unsigned char   aes_iv[AES_BLOCK_SIZE];
 312                 void            *c_seg;
 313         } encrypt_iv;
 314
 315         assert(swap_crypt_ctx_initialized);
 316
 317         bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
 318
 319         encrypt_iv.c_seg = (void*)c_seg;
 320
 321         /* encrypt the "initial vector" */
 322         aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0],
 323                         swap_crypt_null_iv,
 324                         1,
 325                         &encrypt_iv.aes_iv[0],
 326                         &swap_crypt_ctx.encrypt);
 327
 328         kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
 329         size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
 330
 331         /*
 332          * Encrypt the c_segment.
 333          */
 334         aes_encrypt_cbc((const unsigned char *) kernel_vaddr,
 335                         &encrypt_iv.aes_iv[0],
 336                         (unsigned int)(size / AES_BLOCK_SIZE),
 337                         (unsigned char *) kernel_vaddr,
 338                         &swap_crypt_ctx.encrypt);
 339
 340         vm_page_encrypt_counter += (size/PAGE_SIZE_64);
 341 }
 342
 343 void
 344 vm_swap_decrypt(c_segment_t c_seg)
 345 {
 346
 347         vm_offset_t     kernel_vaddr = 0;
 348         uint64_t        size = 0;
 349
 350         union {
 351                 unsigned char   aes_iv[AES_BLOCK_SIZE];
 352                 void            *c_seg;
 353         } decrypt_iv;
 354
 355
 356         assert(swap_crypt_ctx_initialized);
 357
 358         /*
 359          * Prepare an "initial vector" for the decryption.
 360          * It has to be the same as the "initial vector" we
 361          * used to encrypt that page.
 362          */
 363         bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv));
 364
 365         decrypt_iv.c_seg = (void*)c_seg;
 366
 367         /* encrypt the "initial vector" */
 368         aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0],
 369                         swap_crypt_null_iv,
 370                         1,
 371                         &decrypt_iv.aes_iv[0],
 372                         &swap_crypt_ctx.encrypt);
 373
 374         kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer;
 375         size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
 376
 377         /*
 378          * Decrypt the c_segment.
 379          */
 380         aes_decrypt_cbc((const unsigned char *) kernel_vaddr,
 381                         &decrypt_iv.aes_iv[0],
 382                         (unsigned int) (size / AES_BLOCK_SIZE),
 383                         (unsigned char *) kernel_vaddr,
 384                         &swap_crypt_ctx.decrypt);
 385
 386         vm_page_decrypt_counter += (size/PAGE_SIZE_64);
 387 }
 388 #endif /* ENCRYPTED_SWAP */
 389
 390
 391 void
 392 vm_swap_consider_defragmenting()
 393 {
 394         if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
 395             (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
 396
 397                 if (!vm_swapfile_gc_thread_running) {
 398                         lck_mtx_lock(&vm_swap_data_lock);
 399
 400                         if (!vm_swapfile_gc_thread_running)
 401                                 thread_wakeup((event_t) &vm_swapfile_gc_needed);
 402
 403                         lck_mtx_unlock(&vm_swap_data_lock);
 404                 }
 405         }
 406 }
 407
 408
 409 int vm_swap_defragment_yielded = 0;
 410 int vm_swap_defragment_swapin = 0;
 411 int vm_swap_defragment_free = 0;
 412 int vm_swap_defragment_busy = 0;
 413
 414
 415 static void
 416 vm_swap_defragment()
 417 {
 418         c_segment_t     c_seg;
 419
 420         /*
 421          * have to grab the master lock w/o holding
 422          * any locks in spin mode
 423          */
 424         PAGE_REPLACEMENT_DISALLOWED(TRUE);
 425
 426         lck_mtx_lock_spin_always(c_list_lock);
 427
 428         while (!queue_empty(&c_swappedout_sparse_list_head)) {
 429
 430                 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
 431                         vm_swap_defragment_yielded++;
 432                         break;
 433                 }
 434                 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
 435
 436                 lck_mtx_lock_spin_always(&c_seg->c_lock);
 437
 438                 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
 439
 440                 if (c_seg->c_busy) {
 441                         lck_mtx_unlock_always(c_list_lock);
 442
 443                         PAGE_REPLACEMENT_DISALLOWED(FALSE);
 444                         /*
 445                          * c_seg_wait_on_busy consumes c_seg->c_lock
 446                          */
 447                         c_seg_wait_on_busy(c_seg);
 448
 449                         PAGE_REPLACEMENT_DISALLOWED(TRUE);
 450
 451                         lck_mtx_lock_spin_always(c_list_lock);
 452
 453                         vm_swap_defragment_busy++;
 454                         continue;
 455                 }
 456                 if (c_seg->c_bytes_used == 0) {
 457                         /*
 458                          * c_seg_free_locked consumes the c_list_lock
 459                          * and c_seg->c_lock
 460                          */
 461                         C_SEG_BUSY(c_seg);
 462                         c_seg_free_locked(c_seg);
 463
 464                         vm_swap_defragment_free++;
 465                 } else {
 466                         lck_mtx_unlock_always(c_list_lock);
 467
 468                         c_seg_swapin(c_seg, TRUE);
 469                         lck_mtx_unlock_always(&c_seg->c_lock);
 470
 471                         vm_swap_defragment_swapin++;
 472                 }
 473                 PAGE_REPLACEMENT_DISALLOWED(FALSE);
 474
 475                 vm_pageout_io_throttle();
 476
 477                 /*
 478                  * because write waiters have privilege over readers,
 479                  * dropping and immediately retaking the master lock will
 480                  * still allow any thread waiting to acquire the
 481                  * master lock exclusively an opportunity to take it
 482                  */
 483                 PAGE_REPLACEMENT_DISALLOWED(TRUE);
 484
 485                 lck_mtx_lock_spin_always(c_list_lock);
 486         }
 487         lck_mtx_unlock_always(c_list_lock);
 488
 489         PAGE_REPLACEMENT_DISALLOWED(FALSE);
 490 }
 491
 492
 493
 494 static void
 495 vm_swapfile_create_thread(void)
 496 {
 497         clock_sec_t     sec;
 498         clock_nsec_t    nsec;
 499
 500         current_thread()->options |= TH_OPT_VMPRIV;
 501
 502         vm_swapfile_create_thread_awakened++;
 503         vm_swapfile_create_thread_running = 1;
 504
 505         while (TRUE) {
 506                 /*
 507                  * walk through the list of swap files
 508                  * and do the delayed frees/trims for
 509                  * any swap file whose count of delayed
 510                  * frees is above the batch limit
 511                  */
 512                 vm_swap_handle_delayed_trims(FALSE);
 513
 514                 lck_mtx_lock(&vm_swap_data_lock);
 515
 516                 clock_get_system_nanotime(&sec, &nsec);
 517
 518                 if (VM_SWAP_SHOULD_CREATE(sec) == 0)
 519                         break;
 520
 521                 lck_mtx_unlock(&vm_swap_data_lock);
 522
 523                 if (vm_swap_create_file() == FALSE) {
 524                         vm_swapfile_last_failed_to_create_ts = sec;
 525                         HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
 526
 527                 } else
 528                         vm_swapfile_last_successful_create_ts = sec;
 529         }
 530         vm_swapfile_create_thread_running = 0;
 531
 532         assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
 533
 534         lck_mtx_unlock(&vm_swap_data_lock);
 535
 536         thread_block((thread_continue_t)vm_swapfile_create_thread);
 537
 538         /* NOTREACHED */
 539 }
 540
 541
 542 static void
 543 vm_swapfile_gc_thread(void)
 544 {
 545         boolean_t       need_defragment;
 546         boolean_t       need_reclaim;
 547
 548         vm_swapfile_gc_thread_awakened++;
 549         vm_swapfile_gc_thread_running = 1;
 550
 551         while (TRUE) {
 552
 553                 lck_mtx_lock(&vm_swap_data_lock);
 554
 555                 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
 556                         break;
 557
 558                 need_defragment = FALSE;
 559                 need_reclaim = FALSE;
 560
 561                 if (VM_SWAP_SHOULD_DEFRAGMENT())
 562                         need_defragment = TRUE;
 563
 564                 if (VM_SWAP_SHOULD_RECLAIM()) {
 565                         need_defragment = TRUE;
 566                         need_reclaim = TRUE;
 567                 }
 568                 if (need_defragment == FALSE && need_reclaim == FALSE)
 569                         break;
 570
 571                 lck_mtx_unlock(&vm_swap_data_lock);
 572
 573                 if (need_defragment == TRUE)
 574                         vm_swap_defragment();
 575                 if (need_reclaim == TRUE)
 576                         vm_swap_reclaim();
 577         }
 578         vm_swapfile_gc_thread_running = 0;
 579
 580         assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
 581
 582         lck_mtx_unlock(&vm_swap_data_lock);
 583
 584         thread_block((thread_continue_t)vm_swapfile_gc_thread);
 585
 586         /* NOTREACHED */
 587 }
 588
 589
 590
 591 int       swapper_entered_T0 = 0;
 592 int       swapper_entered_T1 = 0;
 593 int       swapper_entered_T2 = 0;
 594
 595 static void
 596 vm_swapout_thread_throttle_adjust(void)
 597 {
 598         int swapper_throttle_new;
 599
 600         if (swapper_throttle_inited == FALSE) {
 601                 /*
 602                  * force this thread to be set to the correct
 603                  * throttling tier
 604                  */
 605                 swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
 606                 swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
 607                 swapper_throttle_inited = TRUE;
 608                 swapper_entered_T2++;
 609                 goto done;
 610         }
 611         swapper_throttle_new = swapper_throttle;
 612
 613
 614         switch(swapper_throttle) {
 615
 616         case THROTTLE_LEVEL_COMPRESSOR_TIER2:
 617
 618                 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) {
 619                         swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
 620                         swapper_entered_T1++;
 621                         break;
 622                 }
 623                 break;
 624
 625         case THROTTLE_LEVEL_COMPRESSOR_TIER1:
 626
 627                 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
 628                         swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0;
 629                         swapper_entered_T0++;
 630                         break;
 631                 }
 632                 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) {
 633                         swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
 634                         swapper_entered_T2++;
 635                         break;
 636                 }
 637                 break;
 638
 639         case THROTTLE_LEVEL_COMPRESSOR_TIER0:
 640
 641                 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
 642                         swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2;
 643                         swapper_entered_T2++;
 644                         break;
 645                 }
 646                 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
 647                         swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1;
 648                         swapper_entered_T1++;
 649                         break;
 650                 }
 651                 break;
 652         }
 653 done:
 654         if (swapper_throttle != swapper_throttle_new) {
 655                 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
 656                                             TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
 657                 proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
 658                                             TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
 659
 660                 swapper_throttle = swapper_throttle_new;
 661         }
 662 }
 663
 664
 665 int vm_swapout_found_empty = 0;
 666
 667
 668 static void
 669 vm_swapout_thread(void)
 670 {
 671         uint64_t        f_offset = 0;
 672         uint32_t        size = 0;
 673         c_segment_t     c_seg = NULL;
 674         kern_return_t   kr = KERN_SUCCESS;
 675         vm_offset_t     addr = 0;
 676
 677         current_thread()->options |= TH_OPT_VMPRIV;
 678
 679         vm_swapout_thread_awakened++;
 680
 681         lck_mtx_lock_spin_always(c_list_lock);
 682
 683         while (!queue_empty(&c_swapout_list_head)) {
 684
 685                 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
 686
 687                 lck_mtx_lock_spin_always(&c_seg->c_lock);
 688
 689                 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
 690
 691                 if (c_seg->c_busy) {
 692                         lck_mtx_unlock_always(c_list_lock);
 693
 694                         c_seg_wait_on_busy(c_seg);
 695
 696                         lck_mtx_lock_spin_always(c_list_lock);
 697
 698                         continue;
 699                 }
 700                 vm_swapout_thread_processed_segments++;
 701
 702                 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
 703
 704                 if (size == 0) {
 705                         assert(c_seg->c_on_minorcompact_q);
 706                         assert(c_seg->c_bytes_used == 0);
 707
 708                         c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
 709                         lck_mtx_unlock_always(&c_seg->c_lock);
 710                         lck_mtx_unlock_always(c_list_lock);
 711
 712                         vm_swapout_found_empty++;
 713                         goto c_seg_is_empty;
 714                 }
 715                 C_SEG_BUSY(c_seg);
 716                 c_seg->c_busy_swapping = 1;
 717
 718                 lck_mtx_unlock_always(c_list_lock);
 719
 720                 addr = (vm_offset_t) c_seg->c_store.c_buffer;
 721
 722                 lck_mtx_unlock_always(&c_seg->c_lock);
 723
 724 #if CHECKSUM_THE_SWAP
 725                 c_seg->cseg_hash = hash_string((char*)addr, (int)size);
 726                 c_seg->cseg_swap_size = size;
 727 #endif /* CHECKSUM_THE_SWAP */
 728
 729 #if ENCRYPTED_SWAP
 730                 vm_swap_encrypt(c_seg);
 731 #endif /* ENCRYPTED_SWAP */
 732
 733                 vm_swapout_thread_throttle_adjust();
 734
 735                 kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg);
 736
 737                 PAGE_REPLACEMENT_DISALLOWED(TRUE);
 738
 739                 if (kr == KERN_SUCCESS) {
 740                         kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
 741                 }
 742                 lck_mtx_lock_spin_always(c_list_lock);
 743                 lck_mtx_lock_spin_always(&c_seg->c_lock);
 744
 745                 if (kr == KERN_SUCCESS) {
 746                         int             new_state = C_ON_SWAPPEDOUT_Q;
 747                         boolean_t       insert_head = FALSE;
 748
 749                         if (hibernate_flushing == TRUE) {
 750                                 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
 751                                     c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)
 752                                         insert_head = TRUE;
 753                         } else if (C_SEG_ONDISK_IS_SPARSE(c_seg))
 754                                 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
 755
 756                         c_seg_switch_state(c_seg, new_state, insert_head);
 757
 758                         c_seg->c_store.c_swap_handle = f_offset;
 759
 760                         VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
 761
 762                         if (c_seg->c_bytes_used)
 763                                 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
 764                 } else {
 765 #if ENCRYPTED_SWAP
 766                         vm_swap_decrypt(c_seg);
 767 #endif /* ENCRYPTED_SWAP */
 768                         if (c_seg->c_overage_swap == TRUE) {
 769                                 c_seg->c_overage_swap = FALSE;
 770                                 c_overage_swapped_count--;
 771                         }
 772                         c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
 773                 }
 774                 lck_mtx_unlock_always(c_list_lock);
 775
 776                 c_seg->c_busy_swapping = 0;
 777                 C_SEG_WAKEUP_DONE(c_seg);
 778                 lck_mtx_unlock_always(&c_seg->c_lock);
 779
 780                 PAGE_REPLACEMENT_DISALLOWED(FALSE);
 781
 782                 vm_pageout_io_throttle();
 783 c_seg_is_empty:
 784                 if (c_swapout_count == 0)
 785                         vm_swap_consider_defragmenting();
 786
 787                 lck_mtx_lock_spin_always(c_list_lock);
 788         }
 789
 790         assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
 791
 792         lck_mtx_unlock_always(c_list_lock);
 793
 794         thread_block((thread_continue_t)vm_swapout_thread);
 795
 796         /* NOTREACHED */
 797 }
 798
 799 boolean_t
 800 vm_swap_create_file()
 801 {
 802         uint64_t        size = 0;
 803         int             namelen = 0;
 804         boolean_t       swap_file_created = FALSE;
 805         boolean_t       swap_file_reuse = FALSE;
 806         boolean_t       swap_file_pin = FALSE;
 807         struct swapfile *swf = NULL;
 808
 809         /*
 810          * Any swapfile structure ready for re-use?
 811          */
 812
 813         lck_mtx_lock(&vm_swap_data_lock);
 814
 815         swf = (struct swapfile*) queue_first(&swf_global_queue);
 816
 817         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
 818                 if (swf->swp_flags == SWAP_REUSE) {
 819                         swap_file_reuse = TRUE;
 820                         break;
 821                 }
 822                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
 823         }
 824
 825         lck_mtx_unlock(&vm_swap_data_lock);
 826
 827         if (swap_file_reuse == FALSE) {
 828
 829                 if (strlen(swapfilename) == 0) {
 830                         /*
 831                          * If no swapfile name has been set, we'll
 832                          * use the default name.
 833                          *
 834                          * Also, this function is only called from the swapfile management thread.
 835                          * So we don't need to worry about a race in checking/setting the name here.
 836                          */
 837
 838                         strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
 839                 }
 840
 841                 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
 842
 843                 swf = (struct swapfile*) kalloc(sizeof *swf);
 844                 memset(swf, 0, sizeof(*swf));
 845
 846                 swf->swp_index = vm_num_swap_files + 1;
 847                 swf->swp_pathlen = namelen;
 848                 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
 849
 850                 memset(swf->swp_path, 0, namelen);
 851
 852                 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
 853         }
 854
 855         vm_swapfile_open(swf->swp_path, &swf->swp_vp);
 856
 857         if (swf->swp_vp == NULL) {
 858                 if (swap_file_reuse == FALSE) {
 859                         kfree(swf->swp_path, swf->swp_pathlen);
 860                         kfree(swf, sizeof *swf);
 861                 }
 862                 return FALSE;
 863         }
 864         vm_swapfile_can_be_created = TRUE;
 865
 866         size = MAX_SWAP_FILE_SIZE;
 867
 868         while (size >= MIN_SWAP_FILE_SIZE) {
 869
 870                 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
 871
 872                 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
 873
 874                         int num_bytes_for_bitmap = 0;
 875
 876                         swap_file_created = TRUE;
 877
 878                         swf->swp_size = size;
 879                         swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
 880                         swf->swp_nseginuse = 0;
 881                         swf->swp_free_hint = 0;
 882
 883                         num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1);
 884                         /*
 885                          * Allocate a bitmap that describes the
 886                          * number of segments held by this swapfile.
 887                          */
 888                         swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
 889                         memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
 890
 891                         swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
 892                         memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
 893
 894                         /*
 895                          * passing a NULL trim_list into vnode_trim_list
 896                          * will return ENOTSUP if trim isn't supported
 897                          * and 0 if it is
 898                          */
 899                         if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
 900                                 swp_trim_supported = TRUE;
 901
 902                         lck_mtx_lock(&vm_swap_data_lock);
 903
 904                         swf->swp_flags = SWAP_READY;
 905
 906                         if (swap_file_reuse == FALSE) {
 907                                 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
 908                         }
 909
 910                         vm_num_swap_files++;
 911
 912                         vm_swapfile_total_segs_alloced += swf->swp_nsegs;
 913
 914                         if (swap_file_pin == TRUE) {
 915                                 swf->swp_flags |= SWAP_PINNED;
 916                                 vm_swappin_avail -= swf->swp_size;
 917                         }
 918
 919                         lck_mtx_unlock(&vm_swap_data_lock);
 920
 921                         thread_wakeup((event_t) &vm_num_swap_files);
 922                         break;
 923                 } else {
 924
 925                         size = size / 2;
 926                 }
 927         }
 928         if (swap_file_created == FALSE) {
 929
 930                 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
 931
 932                 swf->swp_vp = NULL;
 933
 934                 if (swap_file_reuse == FALSE) {
 935                         kfree(swf->swp_path, swf->swp_pathlen);
 936                         kfree(swf, sizeof *swf);
 937                 }
 938         }
 939         return swap_file_created;
 940 }
 941
 942
 943 kern_return_t
 944 vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size)
 945 {
 946         struct swapfile *swf = NULL;
 947         uint64_t        file_offset = 0;
 948         int             retval = 0;
 949
 950         if (addr == 0) {
 951                 return KERN_FAILURE;
 952         }
 953
 954         lck_mtx_lock(&vm_swap_data_lock);
 955
 956         swf = vm_swapfile_for_handle(f_offset);
 957
 958         if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
 959                 retval = 1;
 960                 goto done;
 961         }
 962         swf->swp_io_count++;
 963
 964         lck_mtx_unlock(&vm_swap_data_lock);
 965
 966         file_offset = (f_offset & SWAP_SLOT_MASK);
 967         retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ);
 968
 969         if (retval == 0)
 970                 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
 971         else
 972                 vm_swap_get_failures++;
 973
 974         /*
 975          * Free this slot in the swap structure.
 976          */
 977         vm_swap_free(f_offset);
 978
 979         lck_mtx_lock(&vm_swap_data_lock);
 980         swf->swp_io_count--;
 981
 982         if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
 983
 984                 swf->swp_flags &= ~SWAP_WANTED;
 985                 thread_wakeup((event_t) &swf->swp_flags);
 986         }
 987 done:
 988         lck_mtx_unlock(&vm_swap_data_lock);
 989
 990         if (retval == 0)
 991                 return KERN_SUCCESS;
 992         else
 993                 return KERN_FAILURE;
 994 }
 995
 996 kern_return_t
 997 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg)
 998 {
 999         unsigned int    segidx = 0;
1000         struct swapfile *swf = NULL;
1001         uint64_t        file_offset = 0;
1002         uint64_t        swapfile_index = 0;
1003         unsigned int    byte_for_segidx = 0;
1004         unsigned int    offset_within_byte = 0;
1005         boolean_t       swf_eligible = FALSE;
1006         boolean_t       waiting = FALSE;
1007         boolean_t       retried = FALSE;
1008         int             error = 0;
1009         clock_sec_t     sec;
1010         clock_nsec_t    nsec;
1011
1012         if (addr == 0 || f_offset == NULL) {
1013                 return KERN_FAILURE;
1014         }
1015 retry:
1016         lck_mtx_lock(&vm_swap_data_lock);
1017
1018         swf = (struct swapfile*) queue_first(&swf_global_queue);
1019
1020         while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1021
1022                 segidx = swf->swp_free_hint;
1023
1024                 swf_eligible =  (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1025
1026                 if (swf_eligible) {
1027
1028                         while(segidx < swf->swp_nsegs) {
1029
1030                                 byte_for_segidx = segidx >> 3;
1031                                 offset_within_byte = segidx % 8;
1032
1033                                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1034                                         segidx++;
1035                                         continue;
1036                                 }
1037
1038                                 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1039
1040                                 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1041                                 swf->swp_nseginuse++;
1042                                 swf->swp_io_count++;
1043                                 swapfile_index = swf->swp_index;
1044
1045                                 vm_swapfile_total_segs_used++;
1046
1047                                 clock_get_system_nanotime(&sec, &nsec);
1048
1049                                 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1050                                         thread_wakeup((event_t) &vm_swapfile_create_needed);
1051
1052                                 lck_mtx_unlock(&vm_swap_data_lock);
1053
1054                                 goto done;
1055                         }
1056                 }
1057                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1058         }
1059         assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1060
1061         /*
1062          * we've run out of swap segments, but may not
1063          * be in a position to immediately create a new swap
1064          * file if we've recently failed to create due to a lack
1065          * of free space in the root filesystem... we'll try
1066          * to kick that create off, but in any event we're going
1067          * to take a breather (up to 1 second) so that we're not caught in a tight
1068          * loop back in "vm_compressor_compact_and_swap" trying to stuff
1069          * segments into swap files only to have them immediately put back
1070          * on the c_age queue due to vm_swap_put failing.
1071          *
1072          * if we're doing these puts due to a hibernation flush,
1073          * no need to block... setting hibernate_no_swapspace to TRUE,
1074          * will cause "vm_compressor_compact_and_swap" to immediately abort
1075          */
1076         clock_get_system_nanotime(&sec, &nsec);
1077
1078         if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1079                 thread_wakeup((event_t) &vm_swapfile_create_needed);
1080
1081         if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1082                 waiting = TRUE;
1083                 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
1084         } else
1085                 hibernate_no_swapspace = TRUE;
1086
1087         lck_mtx_unlock(&vm_swap_data_lock);
1088
1089         if (waiting == TRUE) {
1090                 thread_block(THREAD_CONTINUE_NULL);
1091
1092                 if (retried == FALSE && hibernate_flushing == TRUE) {
1093                         retried = TRUE;
1094                         goto retry;
1095                 }
1096         }
1097         vm_swap_put_failures++;
1098
1099         return KERN_FAILURE;
1100
1101 done:
1102         error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE);
1103
1104         lck_mtx_lock(&vm_swap_data_lock);
1105
1106         swf->swp_csegs[segidx] = c_seg;
1107
1108         swf->swp_io_count--;
1109
1110         *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1111
1112         if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1113
1114                 swf->swp_flags &= ~SWAP_WANTED;
1115                 thread_wakeup((event_t) &swf->swp_flags);
1116         }
1117
1118         lck_mtx_unlock(&vm_swap_data_lock);
1119
1120         if (error) {
1121                 vm_swap_free(*f_offset);
1122
1123                 vm_swap_put_failures++;
1124
1125                 return KERN_FAILURE;
1126         }
1127         return KERN_SUCCESS;
1128 }
1129
1130
1131
1132 static void
1133 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1134 {
1135         uint64_t        file_offset = 0;
1136         unsigned int    segidx = 0;
1137
1138
1139         if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1140
1141                 unsigned int byte_for_segidx = 0;
1142                 unsigned int offset_within_byte = 0;
1143
1144                 file_offset = (f_offset & SWAP_SLOT_MASK);
1145                 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1146
1147                 byte_for_segidx = segidx >> 3;
1148                 offset_within_byte = segidx % 8;
1149
1150                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1151
1152                         (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1153
1154                         swf->swp_csegs[segidx] = NULL;
1155
1156                         swf->swp_nseginuse--;
1157                         vm_swapfile_total_segs_used--;
1158
1159                         if (segidx < swf->swp_free_hint) {
1160                                 swf->swp_free_hint = segidx;
1161                         }
1162                 }
1163                 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1164                         thread_wakeup((event_t) &vm_swapfile_gc_needed);
1165         }
1166 }
1167
1168
1169 uint32_t vm_swap_free_now_count = 0;
1170 uint32_t vm_swap_free_delayed_count = 0;
1171
1172
1173 void
1174 vm_swap_free(uint64_t f_offset)
1175 {
1176         struct swapfile *swf = NULL;
1177         struct trim_list *tl = NULL;
1178         clock_sec_t     sec;
1179         clock_nsec_t    nsec;
1180
1181         if (swp_trim_supported == TRUE)
1182                 tl = kalloc(sizeof(struct trim_list));
1183
1184         lck_mtx_lock(&vm_swap_data_lock);
1185
1186         swf = vm_swapfile_for_handle(f_offset);
1187
1188         if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1189
1190                 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1191                         /*
1192                          * don't delay the free if the underlying disk doesn't support
1193                          * trim, or we're in the midst of reclaiming this swap file since
1194                          * we don't want to move segments that are technically free
1195                          * but not yet handled by the delayed free mechanism
1196                          */
1197                         vm_swap_free_now(swf, f_offset);
1198
1199                         vm_swap_free_now_count++;
1200                         goto done;
1201                 }
1202                 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1203                 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1204
1205                 tl->tl_next = swf->swp_delayed_trim_list_head;
1206                 swf->swp_delayed_trim_list_head = tl;
1207                 swf->swp_delayed_trim_count++;
1208                 tl = NULL;
1209
1210                 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1211                         clock_get_system_nanotime(&sec, &nsec);
1212
1213                         if (sec > dont_trim_until_ts)
1214                                 thread_wakeup((event_t) &vm_swapfile_create_needed);
1215                 }
1216                 vm_swap_free_delayed_count++;
1217         }
1218 done:
1219         lck_mtx_unlock(&vm_swap_data_lock);
1220
1221         if (tl != NULL)
1222                 kfree(tl, sizeof(struct trim_list));
1223 }
1224
1225
1226 static void
1227 vm_swap_wait_on_trim_handling_in_progress()
1228 {
1229         while (delayed_trim_handling_in_progress == TRUE) {
1230
1231                 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1232                 lck_mtx_unlock(&vm_swap_data_lock);
1233
1234                 thread_block(THREAD_CONTINUE_NULL);
1235
1236                 lck_mtx_lock(&vm_swap_data_lock);
1237         }
1238 }
1239
1240
1241 static void
1242 vm_swap_handle_delayed_trims(boolean_t force_now)
1243 {
1244         struct swapfile *swf = NULL;
1245
1246         /*
1247          * serialize the race between us and vm_swap_reclaim...
1248          * if vm_swap_reclaim wins it will turn off SWAP_READY
1249          * on the victim it has chosen... we can just skip over
1250          * that file since vm_swap_reclaim will first process
1251          * all of the delayed trims associated with it
1252          */
1253         lck_mtx_lock(&vm_swap_data_lock);
1254
1255         delayed_trim_handling_in_progress = TRUE;
1256
1257         lck_mtx_unlock(&vm_swap_data_lock);
1258
1259         /*
1260          * no need to hold the lock to walk the swf list since
1261          * vm_swap_create (the only place where we add to this list)
1262          * is run on the same thread as this function
1263          * and vm_swap_reclaim doesn't remove items from this list
1264          * instead marking them with SWAP_REUSE for future re-use
1265          */
1266         swf = (struct swapfile*) queue_first(&swf_global_queue);
1267
1268         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1269
1270                 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1271
1272                         assert(!(swf->swp_flags & SWAP_RECLAIM));
1273                         vm_swap_do_delayed_trim(swf);
1274                 }
1275                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1276         }
1277         lck_mtx_lock(&vm_swap_data_lock);
1278
1279         delayed_trim_handling_in_progress = FALSE;
1280         thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1281
1282         if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1283                 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1284
1285         lck_mtx_unlock(&vm_swap_data_lock);
1286
1287 }
1288
1289 static void
1290 vm_swap_do_delayed_trim(struct swapfile *swf)
1291 {
1292         struct trim_list *tl, *tl_head;
1293
1294         lck_mtx_lock(&vm_swap_data_lock);
1295
1296         tl_head = swf->swp_delayed_trim_list_head;
1297         swf->swp_delayed_trim_list_head = NULL;
1298         swf->swp_delayed_trim_count = 0;
1299
1300         lck_mtx_unlock(&vm_swap_data_lock);
1301
1302         vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1303
1304         while ((tl = tl_head) != NULL) {
1305                 unsigned int    segidx = 0;
1306                 unsigned int    byte_for_segidx = 0;
1307                 unsigned int    offset_within_byte = 0;
1308
1309                 lck_mtx_lock(&vm_swap_data_lock);
1310
1311                 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1312
1313                 byte_for_segidx = segidx >> 3;
1314                 offset_within_byte = segidx % 8;
1315
1316                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1317
1318                         (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1319
1320                         swf->swp_csegs[segidx] = NULL;
1321
1322                         swf->swp_nseginuse--;
1323                         vm_swapfile_total_segs_used--;
1324
1325                         if (segidx < swf->swp_free_hint) {
1326                                 swf->swp_free_hint = segidx;
1327                         }
1328                 }
1329                 lck_mtx_unlock(&vm_swap_data_lock);
1330
1331                 tl_head = tl->tl_next;
1332
1333                 kfree(tl, sizeof(struct trim_list));
1334         }
1335 }
1336
1337
1338 void
1339 vm_swap_flush()
1340 {
1341         return;
1342 }
1343
1344 int     vm_swap_reclaim_yielded = 0;
1345
1346 void
1347 vm_swap_reclaim(void)
1348 {
1349         vm_offset_t     addr = 0;
1350         unsigned int    segidx = 0;
1351         uint64_t        f_offset = 0;
1352         struct swapfile *swf = NULL;
1353         struct swapfile *smallest_swf = NULL;
1354         unsigned int    min_nsegs = 0;
1355         unsigned int    byte_for_segidx = 0;
1356         unsigned int    offset_within_byte = 0;
1357         uint32_t        c_size = 0;
1358
1359         c_segment_t     c_seg = NULL;
1360
1361         if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1362                 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1363         }
1364
1365         lck_mtx_lock(&vm_swap_data_lock);
1366
1367         /*
1368          * if we're running the swapfile list looking for
1369          * candidates with delayed trims, we need to
1370          * wait before making our decision concerning
1371          * the swapfile we want to reclaim
1372          */
1373         vm_swap_wait_on_trim_handling_in_progress();
1374
1375         /*
1376          * from here until we knock down the SWAP_READY bit,
1377          * we need to remain behind the vm_swap_data_lock...
1378          * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1379          * will not consider this swapfile for processing
1380          */
1381         swf = (struct swapfile*) queue_first(&swf_global_queue);
1382         min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1383         smallest_swf = NULL;
1384
1385         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1386
1387                 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1388
1389                         smallest_swf = swf;
1390                         min_nsegs = swf->swp_nseginuse;
1391                 }
1392                 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1393         }
1394
1395         if (smallest_swf == NULL)
1396                 goto done;
1397
1398         swf = smallest_swf;
1399
1400
1401         swf->swp_flags &= ~SWAP_READY;
1402         swf->swp_flags |= SWAP_RECLAIM;
1403
1404         if (swf->swp_delayed_trim_count) {
1405
1406                 lck_mtx_unlock(&vm_swap_data_lock);
1407
1408                 vm_swap_do_delayed_trim(swf);
1409
1410                 lck_mtx_lock(&vm_swap_data_lock);
1411         }
1412         segidx = 0;
1413
1414         while (segidx < swf->swp_nsegs) {
1415
1416 ReTry_for_cseg:
1417                 /*
1418                  * Wait for outgoing I/Os.
1419                  */
1420                 while (swf->swp_io_count) {
1421
1422                         swf->swp_flags |= SWAP_WANTED;
1423
1424                         assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1425                         lck_mtx_unlock(&vm_swap_data_lock);
1426
1427                         thread_block(THREAD_CONTINUE_NULL);
1428
1429                         lck_mtx_lock(&vm_swap_data_lock);
1430                 }
1431                 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1432                         vm_swap_reclaim_yielded++;
1433                         break;
1434                 }
1435
1436                 byte_for_segidx = segidx >> 3;
1437                 offset_within_byte = segidx % 8;
1438
1439                 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1440
1441                         segidx++;
1442                         continue;
1443                 }
1444
1445                 c_seg = swf->swp_csegs[segidx];
1446                 assert(c_seg);
1447
1448                 lck_mtx_lock_spin_always(&c_seg->c_lock);
1449
1450                 if (c_seg->c_busy) {
1451                         /*
1452                          * a swapped out c_segment in the process of being freed will remain in the
1453                          * busy state until after the vm_swap_free is called on it... vm_swap_free
1454                          * takes the vm_swap_data_lock, so can't change the swap state until after
1455                          * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1456                          * which will allow c_seg_free_locked to clear busy and wake up this thread...
1457                          * at that point, we re-look up the swap state which will now indicate that
1458                          * this c_segment no longer exists.
1459                          */
1460                         c_seg->c_wanted = 1;
1461
1462                         assert_wait((event_t) (c_seg), THREAD_UNINT);
1463                         lck_mtx_unlock_always(&c_seg->c_lock);
1464
1465                         lck_mtx_unlock(&vm_swap_data_lock);
1466
1467                         thread_block(THREAD_CONTINUE_NULL);
1468
1469                         lck_mtx_lock(&vm_swap_data_lock);
1470
1471                         goto ReTry_for_cseg;
1472                 }
1473                 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1474
1475                 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1476
1477                 assert(c_seg == swf->swp_csegs[segidx]);
1478                 swf->swp_csegs[segidx] = NULL;
1479                 swf->swp_nseginuse--;
1480
1481                 vm_swapfile_total_segs_used--;
1482
1483                 lck_mtx_unlock(&vm_swap_data_lock);
1484
1485                 assert(C_SEG_IS_ONDISK(c_seg));
1486
1487                 C_SEG_BUSY(c_seg);
1488                 c_seg->c_busy_swapping = 1;
1489 #if !CHECKSUM_THE_SWAP
1490                 c_seg_trim_tail(c_seg);
1491 #endif
1492                 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1493
1494                 assert(c_size <= C_SEG_BUFSIZE && c_size);
1495
1496                 lck_mtx_unlock_always(&c_seg->c_lock);
1497
1498                 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
1499
1500                         /*
1501                          * reading the data back in failed, so convert c_seg
1502                          * to a swapped in c_segment that contains no data
1503                          */
1504                         c_seg_swapin_requeue(c_seg, FALSE);
1505                         /*
1506                          * returns with c_busy_swapping cleared
1507                          */
1508
1509                         vm_swap_get_failures++;
1510                         goto swap_io_failed;
1511                 }
1512                 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
1513
1514                 if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
1515                         vm_offset_t     c_buffer;
1516
1517                         /*
1518                          * the put failed, so convert c_seg to a fully swapped in c_segment
1519                          * with valid data
1520                          */
1521                         c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
1522
1523                         kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1524
1525                         memcpy((char *)c_buffer, (char *)addr, c_size);
1526
1527                         c_seg->c_store.c_buffer = (int32_t *)c_buffer;
1528 #if ENCRYPTED_SWAP
1529                         vm_swap_decrypt(c_seg);
1530 #endif /* ENCRYPTED_SWAP */
1531                         c_seg_swapin_requeue(c_seg, TRUE);
1532                         /*
1533                          * returns with c_busy_swapping cleared
1534                          */
1535                         OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
1536
1537                         goto swap_io_failed;
1538                 }
1539                 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
1540
1541                 lck_mtx_lock_spin_always(&c_seg->c_lock);
1542
1543                 assert(C_SEG_IS_ONDISK(c_seg));
1544                 /*
1545                  * The c_seg will now know about the new location on disk.
1546                  */
1547                 c_seg->c_store.c_swap_handle = f_offset;
1548                 c_seg->c_busy_swapping = 0;
1549 swap_io_failed:
1550                 C_SEG_WAKEUP_DONE(c_seg);
1551
1552                 lck_mtx_unlock_always(&c_seg->c_lock);
1553                 lck_mtx_lock(&vm_swap_data_lock);
1554         }
1555
1556         if (swf->swp_nseginuse) {
1557
1558                 swf->swp_flags &= ~SWAP_RECLAIM;
1559                 swf->swp_flags |= SWAP_READY;
1560
1561                 goto done;
1562         }
1563         /*
1564          * We don't remove this inactive swf from the queue.
1565          * That way, we can re-use it when needed again and
1566          * preserve the namespace. The delayed_trim processing
1567          * is also dependent on us not removing swfs from the queue.
1568          */
1569         //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1570
1571         vm_num_swap_files--;
1572
1573         vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
1574
1575         lck_mtx_unlock(&vm_swap_data_lock);
1576
1577         vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1578
1579         kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
1580         kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
1581
1582         lck_mtx_lock(&vm_swap_data_lock);
1583
1584         if (swf->swp_flags & SWAP_PINNED) {
1585                 vm_swappin_avail += swf->swp_size;
1586         }
1587
1588         swf->swp_vp = NULL;
1589         swf->swp_size = 0;
1590         swf->swp_free_hint = 0;
1591         swf->swp_nsegs = 0;
1592         swf->swp_flags = SWAP_REUSE;
1593
1594 done:
1595         thread_wakeup((event_t) &swf->swp_flags);
1596         lck_mtx_unlock(&vm_swap_data_lock);
1597
1598         kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
1599 }
1600
1601
1602 uint64_t
1603 vm_swap_get_total_space(void)
1604 {
1605         uint64_t total_space = 0;
1606
1607         total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
1608
1609         return total_space;
1610 }
1611
1612 uint64_t
1613 vm_swap_get_used_space(void)
1614 {
1615         uint64_t used_space = 0;
1616
1617         used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
1618
1619         return used_space;
1620 }
1621
1622 uint64_t
1623 vm_swap_get_free_space(void)
1624 {
1625         return (vm_swap_get_total_space() - vm_swap_get_used_space());
1626 }
1627
1628
1629 int
1630 vm_swap_low_on_space(void)
1631 {
1632
1633         if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
1634                 return (0);
1635
1636         if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
1637
1638                 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1639                         return (0);
1640
1641                 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
1642                         return (1);
1643         }
1644         return (0);
1645 }