git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_compressor_backing

1 /*

3 *

4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@

5 *

6 * This file contains Original Code and/or Modifications of Original Code

7 * as defined in and that are subject to the Apple Public Source License

8 * Version 2.0 (the 'License'). You may not use this file except in

9 * compliance with the License. The rights granted to you under the License

10 * may not be used to create, or enable the creation or redistribution of,

11 * unlawful or unlicensed copies of an Apple operating system, or to

12 * circumvent, violate, or enable the circumvention or violation of, any

13 * terms of an Apple operating system software license agreement.

14 *

15 * Please obtain a copy of the License at

16 * http://www.opensource.apple.com/apsl/ and read it before using this file.

17 *

18 * The Original Code and all software distributed under the License are

19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER

20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,

21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,

22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.

23 * Please see the License for the specific language governing rights and

24 * limitations under the License.

25 *

26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@

27 */

29 #include "vm_compressor_backing_store.h"

30 #include <vm/vm_pageout.h>

31 #include <vm/vm_protos.h>

33 #include <IOKit/IOHibernatePrivate.h>

35 #include <kern/policy_internal.h>

 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");

38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);

40 #if defined(XNU_TARGET_OS_OSX)

41 /*

42 * launchd explicitly turns ON swap later during boot on macOS devices.

43 */

44 boolean_t compressor_store_stop_compaction = TRUE;

45 #else

46 boolean_t compressor_store_stop_compaction = FALSE;

47 #endif

49 boolean_t vm_swapfile_create_needed = FALSE;

50 boolean_t vm_swapfile_gc_needed = FALSE;

52 int vm_swapper_throttle = -1;

53 uint64_t vm_swapout_thread_id;

 uint64_t        vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */

 uint64_t        vm_swap_get_failures = 0; /* Fatal */

 uint64_t        vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */

58 int vm_num_swap_files_config = 0;

59 int vm_num_swap_files = 0;

60 int vm_num_pinned_swap_files = 0;

61 int vm_swapout_thread_processed_segments = 0;

62 int vm_swapout_thread_awakened = 0;

63 bool vm_swapout_thread_running = FALSE;

64 int vm_swapfile_create_thread_awakened = 0;

65 int vm_swapfile_create_thread_running = 0;

66 int vm_swapfile_gc_thread_awakened = 0;

67 int vm_swapfile_gc_thread_running = 0;

69 int64_t vm_swappin_avail = 0;

70 boolean_t vm_swappin_enabled = FALSE;

71 unsigned int vm_swapfile_total_segs_alloced = 0;

72 unsigned int vm_swapfile_total_segs_used = 0;

 char            swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;

76 extern vm_map_t compressor_map;

79 #define SWAP_READY 0x1 /* Swap file is ready to be used */

80 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */

81 #define SWAP_WANTED 0x4 /* Swap file has waiters */

82 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/

83 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */

86 struct swapfile {

87 queue_head_t swp_queue; /* list of swap files */

88 char *swp_path; /* saved pathname of swap file */

89 struct vnode *swp_vp; /* backing vnode */

90 uint64_t swp_size; /* size of this swap file */

91 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */

92 unsigned int swp_pathlen; /* length of pathname */

93 unsigned int swp_nsegs; /* #segments we can use */

94 unsigned int swp_nseginuse; /* #segments in use */

95 unsigned int swp_index; /* index of this swap file */

96 unsigned int swp_flags; /* state of swap file */

97 unsigned int swp_free_hint; /* offset of 1st free chunk */

98 unsigned int swp_io_count; /* count of outstanding I/Os */

99 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */

100

101 struct trim_list *swp_delayed_trim_list_head;

102 unsigned int swp_delayed_trim_count;

103 };

104

105 queue_head_t swf_global_queue;

106 boolean_t swp_trim_supported = FALSE;

107

108 extern clock_sec_t dont_trim_until_ts;

109 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;

110 clock_sec_t vm_swapfile_last_successful_create_ts = 0;

111 int vm_swapfile_can_be_created = FALSE;

112 boolean_t delayed_trim_handling_in_progress = FALSE;

113

114 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;

115

 static void vm_swapout_thread_throttle_adjust(void);

 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);

 static void vm_swapout_thread(void);

 static void vm_swapfile_create_thread(void);

 static void vm_swapfile_gc_thread(void);

 static void vm_swap_defragment(void);

122 static void vm_swap_handle_delayed_trims(boolean_t);

 static void vm_swap_do_delayed_trim(struct swapfile *);

 static void vm_swap_wait_on_trim_handling_in_progress(void);

125

 extern int vnode_getwithref(struct vnode* vp);

127

128 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;

129

130 #if !XNU_TARGET_OS_OSX

131

132 /*

133 * For CONFIG_FREEZE, we scale the c_segments_limit based on the

134 * number of swapfiles allowed. That increases wired memory overhead.

135 * So we want to keep the max swapfiles same on both DEV/RELEASE so

136 * that the memory overhead is similar for performance comparisons.

137 */

138 #define VM_MAX_SWAP_FILE_NUM 5

139

140 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4

141

142 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)

143 #define VM_SWAP_SHOULD_PIN(_size) FALSE

144 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \

145 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)

146 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)

147

148 #else /* !XNU_TARGET_OS_OSX */

149

150 #define VM_MAX_SWAP_FILE_NUM 100

151 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128

152

153 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)

154 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))

155 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \

156 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)

157 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)

158

159 #endif /* !XNU_TARGET_OS_OSX */

160

161 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)

162 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)

163 #define VM_SWAPFILE_DELAYED_CREATE 15

164

165 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)

166

167

168 #if CHECKSUM_THE_SWAP

 extern unsigned int hash_string(char *cp, int len);

170 #endif

171

172 #if RECORD_THE_COMPRESSED_DATA

173 boolean_t c_compressed_record_init_done = FALSE;

174 int c_compressed_record_write_error = 0;

175 struct vnode *c_compressed_record_vp = NULL;

176 uint64_t c_compressed_record_file_offset = 0;

 void    c_compressed_record_init(void);

 void    c_compressed_record_write(char *, int);

179 #endif

180

 extern void                     vm_pageout_io_throttle(void);

182

 static struct swapfile *vm_swapfile_for_handle(uint64_t);

184

185 /*

186 * Called with the vm_swap_data_lock held.

187 */

188

189 static struct swapfile *

190 vm_swapfile_for_handle(uint64_t f_offset)

191 {

192 uint64_t file_offset = 0;

193 unsigned int swapfile_index = 0;

194 struct swapfile* swf = NULL;

195

196 file_offset = (f_offset & SWAP_SLOT_MASK);

197 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);

198

         swf = (struct swapfile*) queue_first(&swf_global_queue);

200

         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {

                 if (swapfile_index == swf->swp_index) {

203 break;

204 }

205

                 swf = (struct swapfile*) queue_next(&swf->swp_queue);

207 }

208

         if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {

210 swf = NULL;

211 }

212

213 return swf;

214 }

215

216 #if ENCRYPTED_SWAP

217

218 #include <libkern/crypto/aesxts.h>

219

 extern int cc_rand_generate(void *, size_t);     /* from libkern/cyrpto/rand.h> */

221

222 boolean_t swap_crypt_initialized;

 void            swap_crypt_initialize(void);

224

225 symmetric_xts xts_modectx;

 uint32_t        swap_crypt_key1[8];   /* big enough for a 256 bit random key */

 uint32_t        swap_crypt_key2[8];   /* big enough for a 256 bit random key */

228

229 #if DEVELOPMENT || DEBUG

230 boolean_t swap_crypt_xts_tested = FALSE;

 unsigned char   swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));

 unsigned char   swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));

 unsigned char   swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));

234 #endif /* DEVELOPMENT || DEBUG */

235

236 unsigned long vm_page_encrypt_counter;

237 unsigned long vm_page_decrypt_counter;

238

239

240 void

241 swap_crypt_initialize(void)

242 {

243 uint8_t *enckey1, *enckey2;

244 int keylen1, keylen2;

245 int error;

246

247 assert(swap_crypt_initialized == FALSE);

248

249 keylen1 = sizeof(swap_crypt_key1);

250 enckey1 = (uint8_t *)&swap_crypt_key1;

251 keylen2 = sizeof(swap_crypt_key2);

252 enckey2 = (uint8_t *)&swap_crypt_key2;

253

         error = cc_rand_generate((void *)enckey1, keylen1);

255 assert(!error);

256

         error = cc_rand_generate((void *)enckey2, keylen2);

258 assert(!error);

259

         error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);

261 assert(!error);

262

263 swap_crypt_initialized = TRUE;

264

265 #if DEVELOPMENT || DEBUG

266 uint8_t *encptr;

267 uint8_t *decptr;

268 uint8_t *refptr;

269 uint8_t *iv;

270 uint64_t ivnum[2];

271 int size = 0;

272 int i = 0;

273 int rc = 0;

274

275 assert(swap_crypt_xts_tested == FALSE);

276

277 /*

278 * Validate the encryption algorithms.

279 *

280 * First initialize the test data.

281 */

         for (i = 0; i < 4096; i++) {

                 swap_crypt_test_page_ref[i] = (char) i;

284 }

         ivnum[0] = (uint64_t)0xaa;

         ivnum[1] = 0;

287 iv = (uint8_t *)ivnum;

288

289 refptr = (uint8_t *)swap_crypt_test_page_ref;

290 encptr = (uint8_t *)swap_crypt_test_page_encrypt;

291 decptr = (uint8_t *)swap_crypt_test_page_decrypt;

292 size = 4096;

293

294 /* encrypt */

         rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);

296 assert(!rc);

297

298 /* compare result with original - should NOT match */

         for (i = 0; i < 4096; i++) {

300 if (swap_crypt_test_page_encrypt[i] !=

301 swap_crypt_test_page_ref[i]) {

302 break;

303 }

304 }

         assert(i != 4096);

306

307 /* decrypt */

         rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);

309 assert(!rc);

310

311 /* compare result with original */

         for (i = 0; i < 4096; i++) {

313 if (swap_crypt_test_page_decrypt[i] !=

314 swap_crypt_test_page_ref[i]) {

315 panic("encryption test failed");

316 }

317 }

318 /* encrypt in place */

         rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);

320 assert(!rc);

321

322 /* decrypt in place */

         rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);

324 assert(!rc);

325

         for (i = 0; i < 4096; i++) {

327 if (swap_crypt_test_page_decrypt[i] !=

328 swap_crypt_test_page_ref[i]) {

329 panic("in place encryption test failed");

330 }

331 }

332 swap_crypt_xts_tested = TRUE;

333 #endif /* DEVELOPMENT || DEBUG */

334 }

335

336

337 void

338 vm_swap_encrypt(c_segment_t c_seg)

339 {

340 uint8_t *ptr;

341 uint8_t *iv;

342 uint64_t ivnum[2];

343 int size = 0;

344 int rc = 0;

345

346 if (swap_crypt_initialized == FALSE) {

347 swap_crypt_initialize();

348 }

349

350 #if DEVELOPMENT || DEBUG

351 C_SEG_MAKE_WRITEABLE(c_seg);

352 #endif

         ptr = (uint8_t *)c_seg->c_store.c_buffer;

         size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

355

         ivnum[0] = (uint64_t)c_seg;

         ivnum[1] = 0;

358 iv = (uint8_t *)ivnum;

359

         rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);

361 assert(!rc);

362

363 vm_page_encrypt_counter += (size / PAGE_SIZE_64);

364

365 #if DEVELOPMENT || DEBUG

366 C_SEG_WRITE_PROTECT(c_seg);

367 #endif

368 }

369

370 void

371 vm_swap_decrypt(c_segment_t c_seg)

372 {

373 uint8_t *ptr;

374 uint8_t *iv;

375 uint64_t ivnum[2];

376 int size = 0;

377 int rc = 0;

378

379 assert(swap_crypt_initialized);

380

381 #if DEVELOPMENT || DEBUG

382 C_SEG_MAKE_WRITEABLE(c_seg);

383 #endif

         ptr = (uint8_t *)c_seg->c_store.c_buffer;

         size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

386

         ivnum[0] = (uint64_t)c_seg;

         ivnum[1] = 0;

389 iv = (uint8_t *)ivnum;

390

         rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);

392 assert(!rc);

393

394 vm_page_decrypt_counter += (size / PAGE_SIZE_64);

395

396 #if DEVELOPMENT || DEBUG

397 C_SEG_WRITE_PROTECT(c_seg);

398 #endif

399 }

400 #endif /* ENCRYPTED_SWAP */

401

402

403 void

404 vm_compressor_swap_init()

405 {

406 thread_t thread = NULL;

407

408 queue_init(&swf_global_queue);

409

         if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,

411 BASEPRI_VM, &thread) != KERN_SUCCESS) {

412 panic("vm_swapout_thread: create failed");

413 }

         thread_set_thread_name(thread, "VM_swapout");

415 vm_swapout_thread_id = thread->thread_id;

416

417 thread_deallocate(thread);

418

         if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,

420 BASEPRI_VM, &thread) != KERN_SUCCESS) {

421 panic("vm_swapfile_create_thread: create failed");

422 }

423

         thread_set_thread_name(thread, "VM_swapfile_create");

425 thread_deallocate(thread);

426

         if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,

428 BASEPRI_VM, &thread) != KERN_SUCCESS) {

429 panic("vm_swapfile_gc_thread: create failed");

430 }

         thread_set_thread_name(thread, "VM_swapfile_gc");

432

433 /*

434 * Swapfile garbage collection will need to allocate memory

435 * to complete its swap reclaim and in-memory compaction.

436 * So allow it to dip into the reserved VM page pool.

437 */

438 thread_lock(thread);

439 thread->options |= TH_OPT_VMPRIV;

440 thread_unlock(thread);

441

442 thread_deallocate(thread);

443

         proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,

445 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);

         proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,

447 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

448

449 #if !XNU_TARGET_OS_OSX

450 /*

451 * dummy value until the swap file gets created

452 * when we drive the first c_segment_t to the

453 * swapout queue... at that time we will

454 * know the true size we have to work with

455 */

456 c_overage_swapped_limit = 16;

457 #endif /* !XNU_TARGET_OS_OSX */

458

459 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;

460 #if DEVELOPMENT || DEBUG

         typeof(vm_num_swap_files_config) parsed_vm_max_num_swap_files = 0;

         if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files, sizeof(parsed_vm_max_num_swap_files))) {

                 if (parsed_vm_max_num_swap_files > 0) {

464 vm_num_swap_files_config = parsed_vm_max_num_swap_files;

465 } else {

                         printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files);

467 }

468 }

469 #endif

         printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config);

471

         printf("VM Swap Subsystem is ON\n");

473 }

474

475

476 #if RECORD_THE_COMPRESSED_DATA

477

478 void

479 c_compressed_record_init()

480 {

481 if (c_compressed_record_init_done == FALSE) {

                 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);

483 c_compressed_record_init_done = TRUE;

484 }

485 }

486

487 void

 c_compressed_record_write(char *buf, int size)

489 {

         if (c_compressed_record_write_error == 0) {

                 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);

492 c_compressed_record_file_offset += size;

493 }

494 }

495 #endif

496

497

498 int compaction_swapper_inited = 0;

499

500 void

501 vm_compaction_swapper_do_init(void)

502 {

503 struct vnode *vp;

504 char *pathname;

505 int namelen;

506

507 if (compaction_swapper_inited) {

508 return;

509 }

510

511 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {

512 compaction_swapper_inited = 1;

513 return;

514 }

515 lck_mtx_lock(&vm_swap_data_lock);

516

517 if (!compaction_swapper_inited) {

                 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;

                 pathname = kheap_alloc(KHEAP_TEMP, namelen, Z_WAITOK | Z_ZERO);

                 snprintf(pathname, namelen, "%s%d", swapfilename, 0);

521

522 vm_swapfile_open(pathname, &vp);

523

524 if (vp) {

                         if (vnode_pager_isSSD(vp) == FALSE) {

526 /*

527 * swap files live on an HDD, so let's make sure to start swapping

528 * much earlier since we're not worried about SSD write-wear and

529 * we have so little write bandwidth to work with

530 * these values were derived expermentially by running the performance

531 * teams stock test for evaluating HDD performance against various

532 * combinations and looking and comparing overall results.

533 * Note that the > relationship between these 4 values must be maintained

534 */

                                 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {

536 vm_compressor_minorcompact_threshold_divisor = 15;

537 }

                                 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {

539 vm_compressor_majorcompact_threshold_divisor = 18;

540 }

                                 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {

542 vm_compressor_unthrottle_threshold_divisor = 24;

543 }

                                 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {

545 vm_compressor_catchup_threshold_divisor = 30;

546 }

547 }

548 #if XNU_TARGET_OS_OSX

549 vnode_setswapmount(vp);

550 vm_swappin_avail = vnode_getswappin_avail(vp);

551

552 if (vm_swappin_avail) {

553 vm_swappin_enabled = TRUE;

554 }

555 #endif /* XNU_TARGET_OS_OSX */

                         vm_swapfile_close((uint64_t)pathname, vp);

557 }

                 kheap_free(KHEAP_TEMP, pathname, namelen);

559

560 compaction_swapper_inited = 1;

561 }

562 lck_mtx_unlock(&vm_swap_data_lock);

563 }

564

565

566 void

567 vm_swap_consider_defragmenting(int flags)

568 {

569 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);

570 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);

571

         if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&

             (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {

                 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {

575 lck_mtx_lock(&vm_swap_data_lock);

576

577 if (force_defrag) {

578 vm_swap_force_defrag = TRUE;

579 }

580

581 if (force_reclaim) {

582 vm_swap_force_reclaim = TRUE;

583 }

584

585 if (!vm_swapfile_gc_thread_running) {

586 thread_wakeup((event_t) &vm_swapfile_gc_needed);

587 }

588

589 lck_mtx_unlock(&vm_swap_data_lock);

590 }

591 }

592 }

593

594

595 int vm_swap_defragment_yielded = 0;

596 int vm_swap_defragment_swapin = 0;

597 int vm_swap_defragment_free = 0;

598 int vm_swap_defragment_busy = 0;

599

600 #if CONFIG_FREEZE

601 extern uint32_t c_segment_pages_compressed_incore;

602 extern uint32_t c_segment_pages_compressed_nearing_limit;

603 extern uint32_t c_segment_count;

604 extern uint32_t c_segments_nearing_limit;

605

606 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);

607

608 extern bool freezer_incore_cseg_acct;

609 #endif /* CONFIG_FREEZE */

610

611 static void

612 vm_swap_defragment()

613 {

614 c_segment_t c_seg;

615

616 /*

617 * have to grab the master lock w/o holding

618 * any locks in spin mode

619 */

620 PAGE_REPLACEMENT_DISALLOWED(TRUE);

621

622 lck_mtx_lock_spin_always(c_list_lock);

623

         while (!queue_empty(&c_swappedout_sparse_list_head)) {

                 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {

626 vm_swap_defragment_yielded++;

627 break;

628 }

                 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);

630

631 lck_mtx_lock_spin_always(&c_seg->c_lock);

632

                 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);

634

635 if (c_seg->c_busy) {

636 lck_mtx_unlock_always(c_list_lock);

637

638 PAGE_REPLACEMENT_DISALLOWED(FALSE);

639 /*

640 * c_seg_wait_on_busy consumes c_seg->c_lock

641 */

642 c_seg_wait_on_busy(c_seg);

643

644 PAGE_REPLACEMENT_DISALLOWED(TRUE);

645

646 lck_mtx_lock_spin_always(c_list_lock);

647

648 vm_swap_defragment_busy++;

649 continue;

650 }

                 if (c_seg->c_bytes_used == 0) {

652 /*

653 * c_seg_free_locked consumes the c_list_lock

654 * and c_seg->c_lock

655 */

656 C_SEG_BUSY(c_seg);

657 c_seg_free_locked(c_seg);

658

659 vm_swap_defragment_free++;

660 } else {

661 lck_mtx_unlock_always(c_list_lock);

662

663 #if CONFIG_FREEZE

664 if (freezer_incore_cseg_acct) {

                                 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {

666 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);

667 }

668

                                 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;

                                 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {

671 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);

672 }

673 }

674 #endif /* CONFIG_FREEZE */

                         if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {

676 lck_mtx_unlock_always(&c_seg->c_lock);

677 }

678

679 vm_swap_defragment_swapin++;

680 }

681 PAGE_REPLACEMENT_DISALLOWED(FALSE);

682

683 vm_pageout_io_throttle();

684

685 /*

686 * because write waiters have privilege over readers,

687 * dropping and immediately retaking the master lock will

688 * still allow any thread waiting to acquire the

689 * master lock exclusively an opportunity to take it

690 */

691 PAGE_REPLACEMENT_DISALLOWED(TRUE);

692

693 lck_mtx_lock_spin_always(c_list_lock);

694 }

695 lck_mtx_unlock_always(c_list_lock);

696

697 PAGE_REPLACEMENT_DISALLOWED(FALSE);

698 }

699

700

701

702 static void

703 vm_swapfile_create_thread(void)

704 {

705 clock_sec_t sec;

706 clock_nsec_t nsec;

707

708 current_thread()->options |= TH_OPT_VMPRIV;

709

710 vm_swapfile_create_thread_awakened++;

711 vm_swapfile_create_thread_running = 1;

712

713 while (TRUE) {

714 /*

715 * walk through the list of swap files

716 * and do the delayed frees/trims for

717 * any swap file whose count of delayed

718 * frees is above the batch limit

719 */

720 vm_swap_handle_delayed_trims(FALSE);

721

722 lck_mtx_lock(&vm_swap_data_lock);

723

724 if (hibernate_in_progress_with_pinned_swap == TRUE) {

725 break;

726 }

727

728 if (compressor_store_stop_compaction == TRUE) {

729 break;

730 }

731

732 clock_get_system_nanotime(&sec, &nsec);

733

                 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {

735 break;

736 }

737

738 lck_mtx_unlock(&vm_swap_data_lock);

739

                 if (vm_swap_create_file() == FALSE) {

741 vm_swapfile_last_failed_to_create_ts = sec;

                         HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);

743 } else {

744 vm_swapfile_last_successful_create_ts = sec;

745 }

746 }

747 vm_swapfile_create_thread_running = 0;

748

749 if (hibernate_in_progress_with_pinned_swap == TRUE) {

750 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);

751 }

752

753 if (compressor_store_stop_compaction == TRUE) {

754 thread_wakeup((event_t)&compressor_store_stop_compaction);

755 }

756

         assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);

758

759 lck_mtx_unlock(&vm_swap_data_lock);

760

761 thread_block((thread_continue_t)vm_swapfile_create_thread);

762

763 /* NOTREACHED */

764 }

765

766

767 #if HIBERNATION

768

769 kern_return_t

770 hibernate_pin_swap(boolean_t start)

771 {

772 vm_compaction_swapper_do_init();

773

774 if (start == FALSE) {

775 lck_mtx_lock(&vm_swap_data_lock);

776 hibernate_in_progress_with_pinned_swap = FALSE;

777 lck_mtx_unlock(&vm_swap_data_lock);

778

779 return KERN_SUCCESS;

780 }

781 if (vm_swappin_enabled == FALSE) {

782 return KERN_SUCCESS;

783 }

784

785 lck_mtx_lock(&vm_swap_data_lock);

786

787 hibernate_in_progress_with_pinned_swap = TRUE;

788

789 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {

                 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);

791

792 lck_mtx_unlock(&vm_swap_data_lock);

793

794 thread_block(THREAD_CONTINUE_NULL);

795

796 lck_mtx_lock(&vm_swap_data_lock);

797 }

798 if (vm_num_swap_files > vm_num_pinned_swap_files) {

799 hibernate_in_progress_with_pinned_swap = FALSE;

800 lck_mtx_unlock(&vm_swap_data_lock);

801

                 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",

803 vm_num_swap_files, vm_num_pinned_swap_files);

804 return KERN_FAILURE;

805 }

806 lck_mtx_unlock(&vm_swap_data_lock);

807

         while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {

                 if (vm_swap_create_file() == FALSE) {

810 break;

811 }

812 }

813 return KERN_SUCCESS;

814 }

815 #endif

816

817 static void

818 vm_swapfile_gc_thread(void)

819 {

820 boolean_t need_defragment;

821 boolean_t need_reclaim;

822

823 vm_swapfile_gc_thread_awakened++;

824 vm_swapfile_gc_thread_running = 1;

825

826 while (TRUE) {

827 lck_mtx_lock(&vm_swap_data_lock);

828

829 if (hibernate_in_progress_with_pinned_swap == TRUE) {

830 break;

831 }

832

                 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {

834 break;

835 }

836

837 need_defragment = FALSE;

838 need_reclaim = FALSE;

839

840 if (VM_SWAP_SHOULD_DEFRAGMENT()) {

841 need_defragment = TRUE;

842 }

843

844 if (VM_SWAP_SHOULD_RECLAIM()) {

845 need_defragment = TRUE;

846 need_reclaim = TRUE;

847 }

                 if (need_defragment == FALSE && need_reclaim == FALSE) {

849 break;

850 }

851

852 vm_swap_force_defrag = FALSE;

853 vm_swap_force_reclaim = FALSE;

854

855 lck_mtx_unlock(&vm_swap_data_lock);

856

857 if (need_defragment == TRUE) {

858 vm_swap_defragment();

859 }

860 if (need_reclaim == TRUE) {

861 vm_swap_reclaim();

862 }

863 }

864 vm_swapfile_gc_thread_running = 0;

865

866 if (hibernate_in_progress_with_pinned_swap == TRUE) {

867 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);

868 }

869

870 if (compressor_store_stop_compaction == TRUE) {

871 thread_wakeup((event_t)&compressor_store_stop_compaction);

872 }

873

         assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);

875

876 lck_mtx_unlock(&vm_swap_data_lock);

877

878 thread_block((thread_continue_t)vm_swapfile_gc_thread);

879

880 /* NOTREACHED */

881 }

882

883

884

885 #define VM_SWAPOUT_LIMIT_T2P 4

886 #define VM_SWAPOUT_LIMIT_T1P 4

887 #define VM_SWAPOUT_LIMIT_T0P 6

888 #define VM_SWAPOUT_LIMIT_T0 8

889 #define VM_SWAPOUT_LIMIT_MAX 8

890

891 #define VM_SWAPOUT_START 0

892 #define VM_SWAPOUT_T2_PASSIVE 1

893 #define VM_SWAPOUT_T1_PASSIVE 2

894 #define VM_SWAPOUT_T0_PASSIVE 3

895 #define VM_SWAPOUT_T0 4

896

897 int vm_swapout_state = VM_SWAPOUT_START;

898 int vm_swapout_limit = 1;

899

900 int vm_swapper_entered_T0 = 0;

901 int vm_swapper_entered_T0P = 0;

902 int vm_swapper_entered_T1P = 0;

903 int vm_swapper_entered_T2P = 0;

904

905

906 static void

907 vm_swapout_thread_throttle_adjust(void)

908 {

909 switch (vm_swapout_state) {

910 case VM_SWAPOUT_START:

911

912 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;

913 vm_swapper_entered_T2P++;

914

915 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

916 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);

917 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

918 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

919 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;

920 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;

921

922 break;

923

924 case VM_SWAPOUT_T2_PASSIVE:

925

926 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {

927 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;

928 vm_swapper_entered_T0P++;

929

930 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

931 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);

932 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

933 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

934 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;

935 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;

936

937 break;

938 }

                 if (swapout_target_age || hibernate_flushing == TRUE) {

940 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;

941 vm_swapper_entered_T1P++;

942

943 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

944 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);

945 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

946 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

947 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;

948 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;

949 }

950 break;

951

952 case VM_SWAPOUT_T1_PASSIVE:

953

954 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {

955 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;

956 vm_swapper_entered_T0P++;

957

958 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

959 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);

960 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

961 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

962 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;

963 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;

964

965 break;

966 }

                 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {

968 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;

969 vm_swapper_entered_T2P++;

970

971 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

972 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);

973 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

974 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

975 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;

976 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;

977 }

978 break;

979

980 case VM_SWAPOUT_T0_PASSIVE:

981

982 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {

983 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;

984 vm_swapper_entered_T2P++;

985

986 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

987 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);

988 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

989 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

990 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;

991 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;

992

993 break;

994 }

995 if (SWAPPER_NEEDS_TO_CATCHUP()) {

996 vm_swapper_entered_T0++;

997

998 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

999 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);

1000 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;

1001 vm_swapout_state = VM_SWAPOUT_T0;

1002 }

1003 break;

1004

1005 case VM_SWAPOUT_T0:

1006

1007 if (SWAPPER_HAS_CAUGHTUP()) {

1008 vm_swapper_entered_T0P++;

1009

1010 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,

1011 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);

1012 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;

1013 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;

1014 }

1015 break;

1016 }

1017 }

1018

1019 int vm_swapout_found_empty = 0;

1020

1021 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];

1022

1023 int vm_swapout_soc_busy = 0;

1024 int vm_swapout_soc_done = 0;

1025

1026

1027 static struct swapout_io_completion *

1028 vm_swapout_find_free_soc(void)

1029 {

1030 int i;

1031

         for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {

                 if (vm_swapout_ctx[i].swp_io_busy == 0) {

1034 return &vm_swapout_ctx[i];

1035 }

1036 }

1037 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);

1038

1039 return NULL;

1040 }

1041

1042 static struct swapout_io_completion *

1043 vm_swapout_find_done_soc(void)

1044 {

1045 int i;

1046

1047 if (vm_swapout_soc_done) {

                 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {

                         if (vm_swapout_ctx[i].swp_io_done) {

1050 return &vm_swapout_ctx[i];

1051 }

1052 }

1053 }

1054 return NULL;

1055 }

1056

1057 static void

 vm_swapout_complete_soc(struct swapout_io_completion *soc)

1059 {

1060 kern_return_t kr;

1061

1062 if (soc->swp_io_error) {

1063 kr = KERN_FAILURE;

1064 } else {

1065 kr = KERN_SUCCESS;

1066 }

1067

1068 lck_mtx_unlock_always(c_list_lock);

1069

         vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);

         vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);

1072

1073 lck_mtx_lock_spin_always(c_list_lock);

1074

1075 soc->swp_io_done = 0;

1076 soc->swp_io_busy = 0;

1077

1078 vm_swapout_soc_busy--;

1079 vm_swapout_soc_done--;

1080 }

1081

1082

1083 static void

1084 vm_swapout_thread(void)

1085 {

1086 uint32_t size = 0;

1087 c_segment_t c_seg = NULL;

1088 kern_return_t kr = KERN_SUCCESS;

1089 struct swapout_io_completion *soc;

1090

1091 current_thread()->options |= TH_OPT_VMPRIV;

1092

1093 vm_swapout_thread_awakened++;

1094

1095 lck_mtx_lock_spin_always(c_list_lock);

1096

1097 vm_swapout_thread_running = TRUE;

1098 again:

         while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {

                 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);

1101

1102 lck_mtx_lock_spin_always(&c_seg->c_lock);

1103

                 assert(c_seg->c_state == C_ON_SWAPOUT_Q);

1105

1106 if (c_seg->c_busy) {

1107 lck_mtx_unlock_always(c_list_lock);

1108

1109 c_seg_wait_on_busy(c_seg);

1110

1111 lck_mtx_lock_spin_always(c_list_lock);

1112

1113 continue;

1114 }

1115 vm_swapout_thread_processed_segments++;

1116

                 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

1118

                 if (size == 0) {

                         assert(c_seg->c_bytes_used == 0);

1121

1122 if (!c_seg->c_on_minorcompact_q) {

1123 c_seg_need_delayed_compaction(c_seg, TRUE);

1124 }

1125

                         c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);

1127 lck_mtx_unlock_always(&c_seg->c_lock);

1128 lck_mtx_unlock_always(c_list_lock);

1129

1130 vm_swapout_found_empty++;

1131 goto c_seg_is_empty;

1132 }

1133 C_SEG_BUSY(c_seg);

1134 c_seg->c_busy_swapping = 1;

1135

                 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);

1137

1138 lck_mtx_unlock_always(c_list_lock);

1139 lck_mtx_unlock_always(&c_seg->c_lock);

1140

1141 #if CHECKSUM_THE_SWAP

                 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);

1143 c_seg->cseg_swap_size = size;

1144 #endif /* CHECKSUM_THE_SWAP */

1145

1146 #if ENCRYPTED_SWAP

1147 vm_swap_encrypt(c_seg);

1148 #endif /* ENCRYPTED_SWAP */

1149

1150 soc = vm_swapout_find_free_soc();

1151 assert(soc);

1152

                 soc->swp_upl_ctx.io_context = (void *)soc;

                 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;

                 soc->swp_upl_ctx.io_error = 0;

1156

                 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);

1158

1159 if (kr != KERN_SUCCESS) {

1160 if (soc->swp_io_done) {

1161 lck_mtx_lock_spin_always(c_list_lock);

1162

1163 soc->swp_io_done = 0;

1164 vm_swapout_soc_done--;

1165

1166 lck_mtx_unlock_always(c_list_lock);

1167 }

                         vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);

1169 } else {

1170 soc->swp_io_busy = 1;

1171 vm_swapout_soc_busy++;

1172 }

1173

1174 c_seg_is_empty:

                 if (c_swapout_count == 0) {

1176 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);

1177 }

1178

1179 lck_mtx_lock_spin_always(c_list_lock);

1180

                 while ((soc = vm_swapout_find_done_soc())) {

1182 vm_swapout_complete_soc(soc);

1183 }

1184 lck_mtx_unlock_always(c_list_lock);

1185

1186 vm_swapout_thread_throttle_adjust();

1187

1188 lck_mtx_lock_spin_always(c_list_lock);

1189 }

         while ((soc = vm_swapout_find_done_soc())) {

1191 vm_swapout_complete_soc(soc);

1192 }

1193 lck_mtx_unlock_always(c_list_lock);

1194

1195 vm_pageout_io_throttle();

1196

1197 lck_mtx_lock_spin_always(c_list_lock);

1198

1199 /*

1200 * Recheck if we have some c_segs to wakeup

1201 * post throttle. And, check to see if we

1202 * have any more swapouts needed.

1203 */

1204 if (vm_swapout_soc_done) {

1205 goto again;

1206 }

1207

         assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);

1209

1210 vm_swapout_thread_running = FALSE;

1211

1212 lck_mtx_unlock_always(c_list_lock);

1213

1214 thread_block((thread_continue_t)vm_swapout_thread);

1215

1216 /* NOTREACHED */

1217 }

1218

1219

1220 void

 vm_swapout_iodone(void *io_context, int error)

1222 {

1223 struct swapout_io_completion *soc;

1224

1225 soc = (struct swapout_io_completion *)io_context;

1226

1227 lck_mtx_lock_spin_always(c_list_lock);

1228

1229 soc->swp_io_done = 1;

1230 soc->swp_io_error = error;

1231 vm_swapout_soc_done++;

1232

1233 if (!vm_swapout_thread_running) {

1234 thread_wakeup((event_t)&c_swapout_list_head);

1235 }

1236

1237 lck_mtx_unlock_always(c_list_lock);

1238 }

1239

1240

1241 static void

 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)

1243 {

1244 PAGE_REPLACEMENT_DISALLOWED(TRUE);

1245

1246 if (kr == KERN_SUCCESS) {

                 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size,

1248 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);

1249 }

1250 #if ENCRYPTED_SWAP

1251 else {

1252 vm_swap_decrypt(c_seg);

1253 }

1254 #endif /* ENCRYPTED_SWAP */

1255 lck_mtx_lock_spin_always(c_list_lock);

1256 lck_mtx_lock_spin_always(&c_seg->c_lock);

1257

1258 if (kr == KERN_SUCCESS) {

1259 int new_state = C_ON_SWAPPEDOUT_Q;

1260 boolean_t insert_head = FALSE;

1261

1262 if (hibernate_flushing == TRUE) {

                         if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&

1264 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {

1265 insert_head = TRUE;

1266 }

                 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {

1268 new_state = C_ON_SWAPPEDOUTSPARSE_Q;

1269 }

1270

                 c_seg_switch_state(c_seg, new_state, insert_head);

1272

1273 c_seg->c_store.c_swap_handle = f_offset;

1274

                 counter_add(&vm_statistics_swapouts, size >> PAGE_SHIFT);

1276

1277 if (c_seg->c_bytes_used) {

                         OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);

1279 }

1280

1281 #if CONFIG_FREEZE

1282 /*

1283 * Successful swapout. Decrement the in-core compressed pages count.

1284 */

                 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);

                 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);

1287 #endif /* CONFIG_FREEZE */

1288 } else {

                 if (c_seg->c_overage_swap == TRUE) {

1290 c_seg->c_overage_swap = FALSE;

1291 c_overage_swapped_count--;

1292 }

1293

1294 #if CONFIG_FREEZE

1295 if (c_seg->c_task_owner) {

1296 c_seg_update_task_owner(c_seg, NULL);

1297 }

1298 #endif /* CONFIG_FREEZE */

1299

                 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);

1301

                 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {

1303 c_seg_need_delayed_compaction(c_seg, TRUE);

1304 }

1305 }

1306 assert(c_seg->c_busy_swapping);

1307 assert(c_seg->c_busy);

1308

1309 c_seg->c_busy_swapping = 0;

1310 lck_mtx_unlock_always(c_list_lock);

1311

1312 C_SEG_WAKEUP_DONE(c_seg);

1313 lck_mtx_unlock_always(&c_seg->c_lock);

1314

1315 PAGE_REPLACEMENT_DISALLOWED(FALSE);

1316 }

1317

1318

1319 boolean_t

1320 vm_swap_create_file()

1321 {

1322 uint64_t size = 0;

1323 int namelen = 0;

1324 boolean_t swap_file_created = FALSE;

1325 boolean_t swap_file_reuse = FALSE;

1326 boolean_t swap_file_pin = FALSE;

1327 struct swapfile *swf = NULL;

1328

1329 /*

1330 * make sure we've got all the info we need

1331 * to potentially pin a swap file... we could

1332 * be swapping out due to hibernation w/o ever

1333 * having run vm_pageout_scan, which is normally

1334 * the trigger to do the init

1335 */

1336 vm_compaction_swapper_do_init();

1337

1338 /*

1339 * Any swapfile structure ready for re-use?

1340 */

1341

1342 lck_mtx_lock(&vm_swap_data_lock);

1343

         swf = (struct swapfile*) queue_first(&swf_global_queue);

1345

         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {

                 if (swf->swp_flags == SWAP_REUSE) {

1348 swap_file_reuse = TRUE;

1349 break;

1350 }

                 swf = (struct swapfile*) queue_next(&swf->swp_queue);

1352 }

1353

1354 lck_mtx_unlock(&vm_swap_data_lock);

1355

1356 if (swap_file_reuse == FALSE) {

                 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;

1358

                 swf = kalloc_flags(sizeof *swf, Z_WAITOK | Z_ZERO);

                 swf->swp_index = vm_num_swap_files + 1;

1361 swf->swp_pathlen = namelen;

                 swf->swp_path = kheap_alloc(KHEAP_DATA_BUFFERS, swf->swp_pathlen,

1363 Z_WAITOK | Z_ZERO);

1364

                 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);

1366 }

1367

         vm_swapfile_open(swf->swp_path, &swf->swp_vp);

1369

         if (swf->swp_vp == NULL) {

1371 if (swap_file_reuse == FALSE) {

                         kheap_free(KHEAP_DATA_BUFFERS, swf->swp_path, swf->swp_pathlen);

                         kfree(swf, sizeof *swf);

1374 }

1375 return FALSE;

1376 }

1377 vm_swapfile_can_be_created = TRUE;

1378

1379 size = MAX_SWAP_FILE_SIZE;

1380

1381 while (size >= MIN_SWAP_FILE_SIZE) {

1382 swap_file_pin = VM_SWAP_SHOULD_PIN(size);

1383

                 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {

1385 int num_bytes_for_bitmap = 0;

1386

1387 swap_file_created = TRUE;

1388

1389 swf->swp_size = size;

                         swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);

1391 swf->swp_nseginuse = 0;

1392 swf->swp_free_hint = 0;

1393

                         num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);

1395 /*

1396 * Allocate a bitmap that describes the

1397 * number of segments held by this swapfile.

1398 */

                         swf->swp_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,

1400 num_bytes_for_bitmap, Z_WAITOK | Z_ZERO);

1401

                         swf->swp_csegs = kalloc_flags(swf->swp_nsegs * sizeof(c_segment_t),

1403 Z_WAITOK | Z_ZERO);

1404

1405 /*

1406 * passing a NULL trim_list into vnode_trim_list

1407 * will return ENOTSUP if trim isn't supported

1408 * and 0 if it is

1409 */

                         if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {

1411 swp_trim_supported = TRUE;

1412 }

1413

1414 lck_mtx_lock(&vm_swap_data_lock);

1415

1416 swf->swp_flags = SWAP_READY;

1417

1418 if (swap_file_reuse == FALSE) {

                                 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);

1420 }

1421

1422 vm_num_swap_files++;

1423

1424 vm_swapfile_total_segs_alloced += swf->swp_nsegs;

1425

1426 if (swap_file_pin == TRUE) {

1427 vm_num_pinned_swap_files++;

1428 swf->swp_flags |= SWAP_PINNED;

1429 vm_swappin_avail -= swf->swp_size;

1430 }

1431

1432 lck_mtx_unlock(&vm_swap_data_lock);

1433

1434 thread_wakeup((event_t) &vm_num_swap_files);

1435 #if !XNU_TARGET_OS_OSX

                         if (vm_num_swap_files == 1) {

                                 c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;

1438

1439 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {

1440 c_overage_swapped_limit /= 2;

1441 }

1442 }

1443 #endif /* !XNU_TARGET_OS_OSX */

1444 break;

1445 } else {

1446 size = size / 2;

1447 }

1448 }

1449 if (swap_file_created == FALSE) {

                 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);

1451

1452 swf->swp_vp = NULL;

1453

1454 if (swap_file_reuse == FALSE) {

                         kheap_free(KHEAP_DATA_BUFFERS, swf->swp_path, swf->swp_pathlen);

                         kfree(swf, sizeof *swf);

1457 }

1458 }

1459 return swap_file_created;

1460 }

1461

 extern void vnode_put(struct vnode* vp);

1463 kern_return_t

 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)

1465 {

1466 struct swapfile *swf = NULL;

1467 uint64_t file_offset = 0;

1468 int retval = 0;

1469

         assert(c_seg->c_store.c_buffer);

1471

1472 lck_mtx_lock(&vm_swap_data_lock);

1473

1474 swf = vm_swapfile_for_handle(f_offset);

1475

         if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {

1477 vm_swap_get_failures++;

1478 retval = 1;

1479 goto done;

1480 }

1481 swf->swp_io_count++;

1482

1483 lck_mtx_unlock(&vm_swap_data_lock);

1484

1485 #if DEVELOPMENT || DEBUG

1486 C_SEG_MAKE_WRITEABLE(c_seg);

1487 #endif

1488 file_offset = (f_offset & SWAP_SLOT_MASK);

1489

         if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {

                 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);

1492 } else {

                 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);

1494 vnode_put(swf->swp_vp);

1495 }

1496

1497 #if DEVELOPMENT || DEBUG

1498 C_SEG_WRITE_PROTECT(c_seg);

1499 #endif

         if (retval == 0) {

                 counter_add(&vm_statistics_swapins, size >> PAGE_SHIFT);

1502 } else {

1503 vm_swap_get_failures++;

1504 }

1505

1506 /*

1507 * Free this slot in the swap structure.

1508 */

1509 vm_swap_free(f_offset);

1510

1511 lck_mtx_lock(&vm_swap_data_lock);

1512 swf->swp_io_count--;

1513

         if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {

1515 swf->swp_flags &= ~SWAP_WANTED;

                 thread_wakeup((event_t) &swf->swp_flags);

1517 }

1518 done:

1519 lck_mtx_unlock(&vm_swap_data_lock);

1520

         if (retval == 0) {

1522 return KERN_SUCCESS;

1523 } else {

1524 return KERN_FAILURE;

1525 }

1526 }

1527

1528 kern_return_t

 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)

1530 {

1531 unsigned int segidx = 0;

1532 struct swapfile *swf = NULL;

1533 uint64_t file_offset = 0;

1534 uint64_t swapfile_index = 0;

1535 unsigned int byte_for_segidx = 0;

1536 unsigned int offset_within_byte = 0;

1537 boolean_t swf_eligible = FALSE;

1538 boolean_t waiting = FALSE;

1539 boolean_t retried = FALSE;

1540 int error = 0;

1541 clock_sec_t sec;

1542 clock_nsec_t nsec;

1543 void *upl_ctx = NULL;

1544 boolean_t drop_iocount = FALSE;

1545

         if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {

1547 return KERN_FAILURE;

1548 }

1549 retry:

1550 lck_mtx_lock(&vm_swap_data_lock);

1551

         swf = (struct swapfile*) queue_first(&swf_global_queue);

1553

         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {

1555 segidx = swf->swp_free_hint;

1556

                 swf_eligible =  (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);

1558

1559 if (swf_eligible) {

                         while (segidx < swf->swp_nsegs) {

1561 byte_for_segidx = segidx >> 3;

1562 offset_within_byte = segidx % 8;

1563

                                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {

1565 segidx++;

1566 continue;

1567 }

1568

                                 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);

1570

1571 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;

1572 swf->swp_nseginuse++;

1573 swf->swp_io_count++;

1574 swf->swp_csegs[segidx] = c_seg;

1575

1576 swapfile_index = swf->swp_index;

1577 vm_swapfile_total_segs_used++;

1578

1579 clock_get_system_nanotime(&sec, &nsec);

1580

                                 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {

1582 thread_wakeup((event_t) &vm_swapfile_create_needed);

1583 }

1584

1585 lck_mtx_unlock(&vm_swap_data_lock);

1586

1587 goto issue_io;

1588 }

1589 }

                 swf = (struct swapfile*) queue_next(&swf->swp_queue);

1591 }

         assert(queue_end(&swf_global_queue, (queue_entry_t) swf));

1593

1594 /*

1595 * we've run out of swap segments, but may not

1596 * be in a position to immediately create a new swap

1597 * file if we've recently failed to create due to a lack

1598 * of free space in the root filesystem... we'll try

1599 * to kick that create off, but in any event we're going

1600 * to take a breather (up to 1 second) so that we're not caught in a tight

1601 * loop back in "vm_compressor_compact_and_swap" trying to stuff

1602 * segments into swap files only to have them immediately put back

1603 * on the c_age queue due to vm_swap_put failing.

1604 *

1605 * if we're doing these puts due to a hibernation flush,

1606 * no need to block... setting hibernate_no_swapspace to TRUE,

1607 * will cause "vm_compressor_compact_and_swap" to immediately abort

1608 */

1609 clock_get_system_nanotime(&sec, &nsec);

1610

         if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {

1612 thread_wakeup((event_t) &vm_swapfile_create_needed);

1613 }

1614

         if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {

1616 waiting = TRUE;

                 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);

1618 } else {

1619 hibernate_no_swapspace = TRUE;

1620 }

1621

1622 lck_mtx_unlock(&vm_swap_data_lock);

1623

1624 if (waiting == TRUE) {

1625 thread_block(THREAD_CONTINUE_NULL);

1626

                 if (retried == FALSE && hibernate_flushing == TRUE) {

1628 retried = TRUE;

1629 goto retry;

1630 }

1631 }

1632 vm_swap_put_failures_no_swap_file++;

1633

1634 return KERN_FAILURE;

1635

1636 issue_io:

1637 assert(c_seg->c_busy_swapping);

1638 assert(c_seg->c_busy);

1639 assert(!c_seg->c_on_minorcompact_q);

1640

         *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;

1642

1643 if (soc) {

1644 soc->swp_c_seg = c_seg;

1645 soc->swp_c_size = size;

1646

1647 soc->swp_swf = swf;

1648

1649 soc->swp_io_error = 0;

1650 soc->swp_io_done = 0;

1651

                 upl_ctx = (void *)&soc->swp_upl_ctx;

1653 }

1654

         if ((error = vnode_getwithref(swf->swp_vp)) != 0) {

                 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);

1657 } else {

                 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);

1659 drop_iocount = TRUE;

1660 }

1661

         if (error || upl_ctx == NULL) {

                 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);

1664 }

1665

1666 return KERN_SUCCESS;

1667 }

1668

1669 kern_return_t

 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)

1671 {

1672 if (drop_iocount) {

1673 vnode_put(swf->swp_vp);

1674 }

1675

1676 lck_mtx_lock(&vm_swap_data_lock);

1677

1678 swf->swp_io_count--;

1679

         if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {

1681 swf->swp_flags &= ~SWAP_WANTED;

                 thread_wakeup((event_t) &swf->swp_flags);

1683 }

1684 lck_mtx_unlock(&vm_swap_data_lock);

1685

1686 if (error) {

1687 vm_swap_free(*f_offset);

1688 vm_swap_put_failures++;

1689

1690 return KERN_FAILURE;

1691 }

1692 return KERN_SUCCESS;

1693 }

1694

1695

1696 static void

 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)

1698 {

1699 uint64_t file_offset = 0;

1700 unsigned int segidx = 0;

1701

1702

         if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {

1704 unsigned int byte_for_segidx = 0;

1705 unsigned int offset_within_byte = 0;

1706

1707 file_offset = (f_offset & SWAP_SLOT_MASK);

                 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);

1709

1710 byte_for_segidx = segidx >> 3;

1711 offset_within_byte = segidx % 8;

1712

                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {

                         (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);

1715

1716 swf->swp_csegs[segidx] = NULL;

1717

1718 swf->swp_nseginuse--;

1719 vm_swapfile_total_segs_used--;

1720

                         if (segidx < swf->swp_free_hint) {

1722 swf->swp_free_hint = segidx;

1723 }

1724 }

                 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {

1726 thread_wakeup((event_t) &vm_swapfile_gc_needed);

1727 }

1728 }

1729 }

1730

1731

1732 uint32_t vm_swap_free_now_count = 0;

1733 uint32_t vm_swap_free_delayed_count = 0;

1734

1735

1736 void

1737 vm_swap_free(uint64_t f_offset)

1738 {

1739 struct swapfile *swf = NULL;

1740 struct trim_list *tl = NULL;

1741 clock_sec_t sec;

1742 clock_nsec_t nsec;

1743

1744 if (swp_trim_supported == TRUE) {

                 tl = kalloc(sizeof(struct trim_list));

1746 }

1747

1748 lck_mtx_lock(&vm_swap_data_lock);

1749

1750 swf = vm_swapfile_for_handle(f_offset);

1751

         if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {

                 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {

1754 /*

1755 * don't delay the free if the underlying disk doesn't support

1756 * trim, or we're in the midst of reclaiming this swap file since

1757 * we don't want to move segments that are technically free

1758 * but not yet handled by the delayed free mechanism

1759 */

1760 vm_swap_free_now(swf, f_offset);

1761

1762 vm_swap_free_now_count++;

1763 goto done;

1764 }

1765 tl->tl_offset = f_offset & SWAP_SLOT_MASK;

1766 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;

1767

1768 tl->tl_next = swf->swp_delayed_trim_list_head;

1769 swf->swp_delayed_trim_list_head = tl;

1770 swf->swp_delayed_trim_count++;

1771 tl = NULL;

1772

                 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {

1774 clock_get_system_nanotime(&sec, &nsec);

1775

1776 if (sec > dont_trim_until_ts) {

1777 thread_wakeup((event_t) &vm_swapfile_create_needed);

1778 }

1779 }

1780 vm_swap_free_delayed_count++;

1781 }

1782 done:

1783 lck_mtx_unlock(&vm_swap_data_lock);

1784

1785 if (tl != NULL) {

                 kfree(tl, sizeof(struct trim_list));

1787 }

1788 }

1789

1790

1791 static void

1792 vm_swap_wait_on_trim_handling_in_progress()

1793 {

1794 while (delayed_trim_handling_in_progress == TRUE) {

                 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);

1796 lck_mtx_unlock(&vm_swap_data_lock);

1797

1798 thread_block(THREAD_CONTINUE_NULL);

1799

1800 lck_mtx_lock(&vm_swap_data_lock);

1801 }

1802 }

1803

1804

1805 static void

1806 vm_swap_handle_delayed_trims(boolean_t force_now)

1807 {

1808 struct swapfile *swf = NULL;

1809

1810 /*

1811 * serialize the race between us and vm_swap_reclaim...

1812 * if vm_swap_reclaim wins it will turn off SWAP_READY

1813 * on the victim it has chosen... we can just skip over

1814 * that file since vm_swap_reclaim will first process

1815 * all of the delayed trims associated with it

1816 */

1817

1818 if (compressor_store_stop_compaction == TRUE) {

1819 return;

1820 }

1821

1822 lck_mtx_lock(&vm_swap_data_lock);

1823

1824 delayed_trim_handling_in_progress = TRUE;

1825

1826 lck_mtx_unlock(&vm_swap_data_lock);

1827

1828 /*

1829 * no need to hold the lock to walk the swf list since

1830 * vm_swap_create (the only place where we add to this list)

1831 * is run on the same thread as this function

1832 * and vm_swap_reclaim doesn't remove items from this list

1833 * instead marking them with SWAP_REUSE for future re-use

1834 */

         swf = (struct swapfile*) queue_first(&swf_global_queue);

1836

         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {

                 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {

                         assert(!(swf->swp_flags & SWAP_RECLAIM));

1840 vm_swap_do_delayed_trim(swf);

1841 }

                 swf = (struct swapfile*) queue_next(&swf->swp_queue);

1843 }

1844 lck_mtx_lock(&vm_swap_data_lock);

1845

1846 delayed_trim_handling_in_progress = FALSE;

1847 thread_wakeup((event_t) &delayed_trim_handling_in_progress);

1848

         if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {

1850 thread_wakeup((event_t) &vm_swapfile_gc_needed);

1851 }

1852

1853 lck_mtx_unlock(&vm_swap_data_lock);

1854 }

1855

1856 static void

 vm_swap_do_delayed_trim(struct swapfile *swf)

1858 {

1859 struct trim_list *tl, *tl_head;

1860 int error;

1861

1862 if (compressor_store_stop_compaction == TRUE) {

1863 return;

1864 }

1865

         if ((error = vnode_getwithref(swf->swp_vp)) != 0) {

                 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);

1868 return;

1869 }

1870

1871 lck_mtx_lock(&vm_swap_data_lock);

1872

1873 tl_head = swf->swp_delayed_trim_list_head;

1874 swf->swp_delayed_trim_list_head = NULL;

1875 swf->swp_delayed_trim_count = 0;

1876

1877 lck_mtx_unlock(&vm_swap_data_lock);

1878

         vnode_trim_list(swf->swp_vp, tl_head, TRUE);

1880

         (void) vnode_put(swf->swp_vp);

1882

         while ((tl = tl_head) != NULL) {

1884 unsigned int segidx = 0;

1885 unsigned int byte_for_segidx = 0;

1886 unsigned int offset_within_byte = 0;

1887

1888 lck_mtx_lock(&vm_swap_data_lock);

1889

                 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);

1891

1892 byte_for_segidx = segidx >> 3;

1893 offset_within_byte = segidx % 8;

1894

                 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {

                         (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);

1897

1898 swf->swp_csegs[segidx] = NULL;

1899

1900 swf->swp_nseginuse--;

1901 vm_swapfile_total_segs_used--;

1902

                         if (segidx < swf->swp_free_hint) {

1904 swf->swp_free_hint = segidx;

1905 }

1906 }

1907 lck_mtx_unlock(&vm_swap_data_lock);

1908

1909 tl_head = tl->tl_next;

1910

                 kfree(tl, sizeof(struct trim_list));

1912 }

1913 }

1914

1915

1916 void

1917 vm_swap_flush()

1918 {

1919 return;

1920 }

1921

1922 int vm_swap_reclaim_yielded = 0;

1923

1924 void

1925 vm_swap_reclaim(void)

1926 {

1927 vm_offset_t addr = 0;

1928 unsigned int segidx = 0;

1929 uint64_t f_offset = 0;

1930 struct swapfile *swf = NULL;

1931 struct swapfile *smallest_swf = NULL;

1932 unsigned int min_nsegs = 0;

1933 unsigned int byte_for_segidx = 0;

1934 unsigned int offset_within_byte = 0;

1935 uint32_t c_size = 0;

1936

1937 c_segment_t c_seg = NULL;

1938

         if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {

                 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");

1941 }

1942

1943 lck_mtx_lock(&vm_swap_data_lock);

1944

1945 /*

1946 * if we're running the swapfile list looking for

1947 * candidates with delayed trims, we need to

1948 * wait before making our decision concerning

1949 * the swapfile we want to reclaim

1950 */

1951 vm_swap_wait_on_trim_handling_in_progress();

1952

1953 /*

1954 * from here until we knock down the SWAP_READY bit,

1955 * we need to remain behind the vm_swap_data_lock...

1956 * once that bit has been turned off, "vm_swap_handle_delayed_trims"

1957 * will not consider this swapfile for processing

1958 */

         swf = (struct swapfile*) queue_first(&swf_global_queue);

1960 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;

1961 smallest_swf = NULL;

1962

         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {

                 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {

1965 smallest_swf = swf;

1966 min_nsegs = swf->swp_nseginuse;

1967 }

                 swf = (struct swapfile*) queue_next(&swf->swp_queue);

1969 }

1970

1971 if (smallest_swf == NULL) {

1972 goto done;

1973 }

1974

1975 swf = smallest_swf;

1976

1977

1978 swf->swp_flags &= ~SWAP_READY;

1979 swf->swp_flags |= SWAP_RECLAIM;

1980

1981 if (swf->swp_delayed_trim_count) {

1982 lck_mtx_unlock(&vm_swap_data_lock);

1983

1984 vm_swap_do_delayed_trim(swf);

1985

1986 lck_mtx_lock(&vm_swap_data_lock);

1987 }

1988 segidx = 0;

1989

         while (segidx < swf->swp_nsegs) {

1991 ReTry_for_cseg:

1992 /*

1993 * Wait for outgoing I/Os.

1994 */

1995 while (swf->swp_io_count) {

1996 swf->swp_flags |= SWAP_WANTED;

1997

                         assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);

1999 lck_mtx_unlock(&vm_swap_data_lock);

2000

2001 thread_block(THREAD_CONTINUE_NULL);

2002

2003 lck_mtx_lock(&vm_swap_data_lock);

2004 }

                 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {

2006 vm_swap_reclaim_yielded++;

2007 break;

2008 }

2009

2010 byte_for_segidx = segidx >> 3;

2011 offset_within_byte = segidx % 8;

2012

                 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {

2014 segidx++;

2015 continue;

2016 }

2017

2018 c_seg = swf->swp_csegs[segidx];

2019 assert(c_seg);

2020

2021 lck_mtx_lock_spin_always(&c_seg->c_lock);

2022

2023 if (c_seg->c_busy) {

2024 /*

2025 * a swapped out c_segment in the process of being freed will remain in the

2026 * busy state until after the vm_swap_free is called on it... vm_swap_free

2027 * takes the vm_swap_data_lock, so can't change the swap state until after

2028 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete

2029 * which will allow c_seg_free_locked to clear busy and wake up this thread...

2030 * at that point, we re-look up the swap state which will now indicate that

2031 * this c_segment no longer exists.

2032 */

2033 c_seg->c_wanted = 1;

2034

                         assert_wait((event_t) (c_seg), THREAD_UNINT);

2036 lck_mtx_unlock_always(&c_seg->c_lock);

2037

2038 lck_mtx_unlock(&vm_swap_data_lock);

2039

2040 thread_block(THREAD_CONTINUE_NULL);

2041

2042 lck_mtx_lock(&vm_swap_data_lock);

2043

2044 goto ReTry_for_cseg;

2045 }

                 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);

2047

2048 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;

2049

                 assert(c_seg == swf->swp_csegs[segidx]);

2051 swf->swp_csegs[segidx] = NULL;

2052 swf->swp_nseginuse--;

2053

2054 vm_swapfile_total_segs_used--;

2055

2056 lck_mtx_unlock(&vm_swap_data_lock);

2057

                 assert(C_SEG_IS_ONDISK(c_seg));

2059

2060 C_SEG_BUSY(c_seg);

2061 c_seg->c_busy_swapping = 1;

2062 #if !CHECKSUM_THE_SWAP

2063 c_seg_trim_tail(c_seg);

2064 #endif

                 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));

2066

                 assert(c_size <= C_SEG_BUFSIZE && c_size);

2068

2069 lck_mtx_unlock_always(&c_seg->c_lock);

2070

                 if (vnode_getwithref(swf->swp_vp)) {

                         printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");

2073 vm_swap_get_failures++;

2074 goto swap_io_failed;

2075 } else {

                         if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {

2077 /*

2078 * reading the data back in failed, so convert c_seg

2079 * to a swapped in c_segment that contains no data

2080 */

                                 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);

2082 /*

2083 * returns with c_busy_swapping cleared

2084 */

2085 vnode_put(swf->swp_vp);

2086 vm_swap_get_failures++;

2087 goto swap_io_failed;

2088 }

2089 vnode_put(swf->swp_vp);

2090 }

2091

                 counter_add(&vm_statistics_swapins, c_size >> PAGE_SHIFT);

2093

                 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {

2095 vm_offset_t c_buffer;

2096

2097 /*

2098 * the put failed, so convert c_seg to a fully swapped in c_segment

2099 * with valid data

2100 */

                         c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);

2102

                         kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);

2104

                         memcpy((char *)c_buffer, (char *)addr, c_size);

2106

                         c_seg->c_store.c_buffer = (int32_t *)c_buffer;

2108 #if ENCRYPTED_SWAP

2109 vm_swap_decrypt(c_seg);

2110 #endif /* ENCRYPTED_SWAP */

                         c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);

2112 /*

2113 * returns with c_busy_swapping cleared

2114 */

                         OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);

2116

2117 goto swap_io_failed;

2118 }

                 counter_add(&vm_statistics_swapouts, c_size >> PAGE_SHIFT);

2120

2121 lck_mtx_lock_spin_always(&c_seg->c_lock);

2122

                 assert(C_SEG_IS_ONDISK(c_seg));

2124 /*

2125 * The c_seg will now know about the new location on disk.

2126 */

2127 c_seg->c_store.c_swap_handle = f_offset;

2128

2129 assert(c_seg->c_busy_swapping);

2130 c_seg->c_busy_swapping = 0;

2131 swap_io_failed:

2132 assert(c_seg->c_busy);

2133 C_SEG_WAKEUP_DONE(c_seg);

2134

2135 lck_mtx_unlock_always(&c_seg->c_lock);

2136 lck_mtx_lock(&vm_swap_data_lock);

2137 }

2138

2139 if (swf->swp_nseginuse) {

2140 swf->swp_flags &= ~SWAP_RECLAIM;

2141 swf->swp_flags |= SWAP_READY;

2142

2143 goto done;

2144 }

2145 /*

2146 * We don't remove this inactive swf from the queue.

2147 * That way, we can re-use it when needed again and

2148 * preserve the namespace. The delayed_trim processing

2149 * is also dependent on us not removing swfs from the queue.

2150 */

2151 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);

2152

2153 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;

2154

2155 lck_mtx_unlock(&vm_swap_data_lock);

2156

         vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);

2158

         kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));

         kheap_free(KHEAP_DATA_BUFFERS, swf->swp_bitmap,

             MAX((swf->swp_nsegs >> 3), 1));

2162

2163 lck_mtx_lock(&vm_swap_data_lock);

2164

         if (swf->swp_flags & SWAP_PINNED) {

2166 vm_num_pinned_swap_files--;

2167 vm_swappin_avail += swf->swp_size;

2168 }

2169

2170 swf->swp_vp = NULL;

2171 swf->swp_size = 0;

2172 swf->swp_free_hint = 0;

2173 swf->swp_nsegs = 0;

2174 swf->swp_flags = SWAP_REUSE;

2175

2176 vm_num_swap_files--;

2177

2178 done:

         thread_wakeup((event_t) &swf->swp_flags);

2180 lck_mtx_unlock(&vm_swap_data_lock);

2181

         kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);

2183 }

2184

2185

2186 uint64_t

2187 vm_swap_get_total_space(void)

2188 {

2189 uint64_t total_space = 0;

2190

         total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;

2192

2193 return total_space;

2194 }

2195

2196 uint64_t

2197 vm_swap_get_used_space(void)

2198 {

2199 uint64_t used_space = 0;

2200

         used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;

2202

2203 return used_space;

2204 }

2205

2206 uint64_t

2207 vm_swap_get_free_space(void)

2208 {

         return vm_swap_get_total_space() - vm_swap_get_used_space();

2210 }

2211

2212 uint64_t

2213 vm_swap_get_max_configured_space(void)

2214 {

         int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);

2216 return num_swap_files * MAX_SWAP_FILE_SIZE;

2217 }

2218

2219 int

2220 vm_swap_low_on_space(void)

2221 {

         if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {

2223 return 0;

2224 }

2225

         if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {

                 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {

2228 return 0;

2229 }

2230

2231 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {

2232 return 1;

2233 }

2234 }

2235 return 0;

2236 }

2237

2238 int

2239 vm_swap_out_of_space(void)

2240 {

2241 if ((vm_num_swap_files == vm_num_swap_files_config) &&

2242 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {

2243 /*

2244 * Last swapfile and we have only space for the

2245 * last few swapouts.

2246 */

2247 return 1;

2248 }

2249

2250 return 0;

2251 }

2252

2253 boolean_t

2254 vm_swap_files_pinned(void)

2255 {

2256 boolean_t result;

2257

2258 if (vm_swappin_enabled == FALSE) {

2259 return TRUE;

2260 }

2261

2262 result = (vm_num_pinned_swap_files == vm_num_swap_files);

2263

2264 return result;

2265 }

2266

2267 #if CONFIG_FREEZE

2268 boolean_t

 vm_swap_max_budget(uint64_t *freeze_daily_budget)

2270 {

2271 boolean_t use_device_value = FALSE;

2272 struct swapfile *swf = NULL;

2273

2274 if (vm_num_swap_files) {

2275 lck_mtx_lock(&vm_swap_data_lock);

2276

                 swf = (struct swapfile*) queue_first(&swf_global_queue);

2278

2279 if (swf) {

                         while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {

                                 if (swf->swp_flags == SWAP_READY) {

2282 assert(swf->swp_vp);

2283

                                         if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {

2285 use_device_value = TRUE;

2286 }

2287 break;

2288 }

                                 swf = (struct swapfile*) queue_next(&swf->swp_queue);

2290 }

2291 }

2292

2293 lck_mtx_unlock(&vm_swap_data_lock);

2294 } else {

2295 /*

2296 * This block is used for the initial budget value before any swap files

2297 * are created. We create a temp swap file to get the budget.

2298 */

2299

2300 struct vnode *temp_vp = NULL;

2301

2302 vm_swapfile_open(swapfilename, &temp_vp);

2303

2304 if (temp_vp) {

                         if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {

2306 use_device_value = TRUE;

2307 }

2308

                         vm_swapfile_close((uint64_t)&swapfilename, temp_vp);

2310 temp_vp = NULL;

2311 } else {

2312 *freeze_daily_budget = 0;

2313 }

2314 }

2315

2316 return use_device_value;

2317 }

2318 #endif /* CONFIG_FREEZE */