]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/vm/vm_compressor.c
xnu-4570.31.3.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <vm/vm_compressor.h>
30
31#if CONFIG_PHANTOM_CACHE
32#include <vm/vm_phantom_cache.h>
33#endif
34
35#include <vm/vm_map.h>
36#include <vm/vm_pageout.h>
37#include <vm/memory_object.h>
38#include <vm/vm_compressor_algorithms.h>
39#include <vm/vm_fault.h>
40#include <vm/vm_protos.h>
41#include <mach/mach_host.h> /* for host_info() */
42#include <kern/ledger.h>
43#include <kern/policy_internal.h>
44#include <kern/thread_group.h>
45#include <san/kasan.h>
46
47#if !CONFIG_EMBEDDED
48#include <i386/misc_protos.h>
49#endif
50
51#include <IOKit/IOHibernatePrivate.h>
52
53#if POPCOUNT_THE_COMPRESSED_DATA
54boolean_t popcount_c_segs = TRUE;
55
56static inline uint32_t vmc_pop(uintptr_t ins, int sz) {
57 uint32_t rv = 0;
58
59 if (__probable(popcount_c_segs == FALSE)) {
60 return 0xDEAD707C;
61 }
62
63 while (sz >= 16) {
64 uint32_t rv1, rv2;
65 uint64_t *ins64 = (uint64_t *) ins;
66 uint64_t *ins642 = (uint64_t *) (ins + 8);
67 rv1 = __builtin_popcountll(*ins64);
68 rv2 = __builtin_popcountll(*ins642);
69 rv += rv1 + rv2;
70 sz -= 16;
71 ins += 16;
72 }
73
74 while (sz >= 4) {
75 uint32_t *ins32 = (uint32_t *) ins;
76 rv += __builtin_popcount(*ins32);
77 sz -= 4;
78 ins += 4;
79 }
80
81 while (sz > 0) {
82 char *ins8 = (char *)ins;
83 rv += __builtin_popcount(*ins8);
84 sz--;
85 ins++;
86 }
87 return rv;
88}
89#endif
90
91/*
92 * vm_compressor_mode has a heirarchy of control to set its value.
93 * boot-args are checked first, then device-tree, and finally
94 * the default value that is defined below. See vm_fault_init() for
95 * the boot-arg & device-tree code.
96 */
97
98#if CONFIG_EMBEDDED
99
100#if CONFIG_FREEZE
101int vm_compressor_mode = VM_PAGER_FREEZER_DEFAULT;
102
103void *freezer_chead; /* The chead used to track c_segs allocated for the exclusive use of holding just one task's compressed memory.*/
104char *freezer_compressor_scratch_buf = NULL;
105
106#define VM_MAX_FREEZER_CSEG_SWAP_COUNT 64 /* The maximum number of c_segs holding just one task's compressed memory that can be swapped out to disk.*/
107extern int c_freezer_swapout_count; /* This count keeps track of the # of c_segs holding just one task's compressed memory on the swapout queue. This count is used during each freeze i.e. on a per-task basis.*/
108
109#else /* CONFIG_FREEZE */
110int vm_compressor_mode = VM_PAGER_NOT_CONFIGURED;
111#endif /* CONFIG_FREEZE */
112
113int vm_scale = 1;
114
115#else /* CONFIG_EMBEDDED */
116int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
117int vm_scale = 16;
118
119#endif /* CONFIG_EMBEDDED */
120
121int vm_compressor_is_active = 0;
122int vm_compression_limit = 0;
123int vm_compressor_available = 0;
124
125extern void vm_pageout_io_throttle(void);
126
127#if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA
128extern unsigned int hash_string(char *cp, int len);
129static unsigned int vmc_hash(char *, int);
130boolean_t checksum_c_segs = TRUE;
131
132unsigned int vmc_hash(char *cp, int len) {
133 if (__probable(checksum_c_segs == FALSE)) {
134 return 0xDEAD7A37;
135 }
136 return hash_string(cp, len);
137}
138#endif
139
140#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
141#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
142
143
144struct c_sv_hash_entry {
145 union {
146 struct {
147 uint32_t c_sv_he_ref;
148 uint32_t c_sv_he_data;
149 } c_sv_he;
150 uint64_t c_sv_he_record;
151
152 } c_sv_he_un;
153};
154
155#define he_ref c_sv_he_un.c_sv_he.c_sv_he_ref
156#define he_data c_sv_he_un.c_sv_he.c_sv_he_data
157#define he_record c_sv_he_un.c_sv_he_record
158
159#define C_SV_HASH_MAX_MISS 32
160#define C_SV_HASH_SIZE ((1 << 10))
161#define C_SV_HASH_MASK ((1 << 10) - 1)
162#define C_SV_CSEG_ID ((1 << 22) - 1)
163
164
165union c_segu {
166 c_segment_t c_seg;
167 uintptr_t c_segno;
168};
169
170
171
172#define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) KERNEL_PMAP_HEAP_RANGE_START) >> 2)
173#define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) KERNEL_PMAP_HEAP_RANGE_START)
174
175
176uint32_t c_segment_count = 0;
177uint32_t c_segment_count_max = 0;
178
179uint64_t c_generation_id = 0;
180uint64_t c_generation_id_flush_barrier;
181
182
183#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120
184
185boolean_t hibernate_no_swapspace = FALSE;
186clock_sec_t hibernate_flushing_deadline = 0;
187
188
189#if RECORD_THE_COMPRESSED_DATA
190char *c_compressed_record_sbuf;
191char *c_compressed_record_ebuf;
192char *c_compressed_record_cptr;
193#endif
194
195
196queue_head_t c_age_list_head;
197queue_head_t c_swapout_list_head;
198queue_head_t c_swappedin_list_head;
199queue_head_t c_swappedout_list_head;
200queue_head_t c_swappedout_sparse_list_head;
201queue_head_t c_major_list_head;
202queue_head_t c_filling_list_head;
203queue_head_t c_bad_list_head;
204
205uint32_t c_age_count = 0;
206uint32_t c_swapout_count = 0;
207uint32_t c_swappedin_count = 0;
208uint32_t c_swappedout_count = 0;
209uint32_t c_swappedout_sparse_count = 0;
210uint32_t c_major_count = 0;
211uint32_t c_filling_count = 0;
212uint32_t c_empty_count = 0;
213uint32_t c_bad_count = 0;
214
215
216queue_head_t c_minor_list_head;
217uint32_t c_minor_count = 0;
218
219int c_overage_swapped_count = 0;
220int c_overage_swapped_limit = 0;
221
222int c_seg_fixed_array_len;
223union c_segu *c_segments;
224vm_offset_t c_buffers;
225vm_size_t c_buffers_size;
226caddr_t c_segments_next_page;
227boolean_t c_segments_busy;
228uint32_t c_segments_available;
229uint32_t c_segments_limit;
230uint32_t c_segments_nearing_limit;
231
232uint32_t c_segment_svp_in_hash;
233uint32_t c_segment_svp_hash_succeeded;
234uint32_t c_segment_svp_hash_failed;
235uint32_t c_segment_svp_zero_compressions;
236uint32_t c_segment_svp_nonzero_compressions;
237uint32_t c_segment_svp_zero_decompressions;
238uint32_t c_segment_svp_nonzero_decompressions;
239
240uint32_t c_segment_noncompressible_pages;
241
242uint32_t c_segment_pages_compressed;
243uint32_t c_segment_pages_compressed_limit;
244uint32_t c_segment_pages_compressed_nearing_limit;
245uint32_t c_free_segno_head = (uint32_t)-1;
246
247uint32_t vm_compressor_minorcompact_threshold_divisor = 10;
248uint32_t vm_compressor_majorcompact_threshold_divisor = 10;
249uint32_t vm_compressor_unthrottle_threshold_divisor = 10;
250uint32_t vm_compressor_catchup_threshold_divisor = 10;
251
252#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu))
253
254
255lck_grp_attr_t vm_compressor_lck_grp_attr;
256lck_attr_t vm_compressor_lck_attr;
257lck_grp_t vm_compressor_lck_grp;
258lck_mtx_t *c_list_lock;
259lck_rw_t c_master_lock;
260boolean_t decompressions_blocked = FALSE;
261
262zone_t compressor_segment_zone;
263int c_compressor_swap_trigger = 0;
264
265uint32_t compressor_cpus;
266char *compressor_scratch_bufs;
267char *kdp_compressor_scratch_buf;
268char *kdp_compressor_decompressed_page;
269addr64_t kdp_compressor_decompressed_page_paddr;
270ppnum_t kdp_compressor_decompressed_page_ppnum;
271
272clock_sec_t start_of_sample_period_sec = 0;
273clock_nsec_t start_of_sample_period_nsec = 0;
274clock_sec_t start_of_eval_period_sec = 0;
275clock_nsec_t start_of_eval_period_nsec = 0;
276uint32_t sample_period_decompression_count = 0;
277uint32_t sample_period_compression_count = 0;
278uint32_t last_eval_decompression_count = 0;
279uint32_t last_eval_compression_count = 0;
280
281#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30)
282
283boolean_t vm_swapout_ripe_segments = FALSE;
284uint32_t vm_ripe_target_age = (60 * 60 * 48);
285
286uint32_t swapout_target_age = 0;
287uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE];
288uint32_t overage_decompressions_during_sample_period = 0;
289uint32_t vm_compressor_pages_grabbed = 0;
290
291
292void do_fastwake_warmup(queue_head_t *, boolean_t);
293boolean_t fastwake_warmup = FALSE;
294boolean_t fastwake_recording_in_progress = FALSE;
295clock_sec_t dont_trim_until_ts = 0;
296
297uint64_t c_segment_warmup_count;
298uint64_t first_c_segment_to_warm_generation_id = 0;
299uint64_t last_c_segment_to_warm_generation_id = 0;
300boolean_t hibernate_flushing = FALSE;
301
302int64_t c_segment_input_bytes __attribute__((aligned(8))) = 0;
303int64_t c_segment_compressed_bytes __attribute__((aligned(8))) = 0;
304int64_t compressor_bytes_used __attribute__((aligned(8))) = 0;
305
306
307struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE] __attribute__ ((aligned (8)));
308
309static boolean_t compressor_needs_to_swap(void);
310static void vm_compressor_swap_trigger_thread(void);
311static void vm_compressor_do_delayed_compactions(boolean_t);
312static void vm_compressor_compact_and_swap(boolean_t);
313static void vm_compressor_age_swapped_in_segments(boolean_t);
314
315#if !CONFIG_EMBEDDED
316static void vm_compressor_take_paging_space_action(void);
317#endif
318
319void compute_swapout_target_age(void);
320
321boolean_t c_seg_major_compact(c_segment_t, c_segment_t);
322boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t);
323
324int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t);
325int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t);
326void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg);
327
328void c_seg_move_to_sparse_list(c_segment_t);
329void c_seg_insert_into_q(queue_head_t *, c_segment_t);
330
331uint64_t vm_available_memory(void);
332uint64_t vm_compressor_pages_compressed(void);
333
334/*
335 * indicate the need to do a major compaction if
336 * the overall set of in-use compression segments
337 * becomes sparse... on systems that support pressure
338 * driven swapping, this will also cause swapouts to
339 * be initiated.
340 */
341static inline boolean_t vm_compressor_needs_to_major_compact()
342{
343 uint32_t incore_seg_count;
344
345 incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
346
347 if ((c_segment_count >= (c_segments_nearing_limit / 8)) &&
348 ((incore_seg_count * C_SEG_MAX_PAGES) - VM_PAGE_COMPRESSOR_COUNT) >
349 ((incore_seg_count / 8) * C_SEG_MAX_PAGES))
350 return (1);
351 return (0);
352}
353
354
355uint64_t
356vm_available_memory(void)
357{
358 return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64);
359}
360
361
362uint64_t
363vm_compressor_pages_compressed(void)
364{
365 return (c_segment_pages_compressed * PAGE_SIZE_64);
366}
367
368
369boolean_t
370vm_compressor_low_on_space(void)
371{
372 if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) ||
373 (c_segment_count > c_segments_nearing_limit))
374 return (TRUE);
375
376 return (FALSE);
377}
378
379
380boolean_t
381vm_compressor_out_of_space(void)
382{
383 if ((c_segment_pages_compressed >= c_segment_pages_compressed_limit) ||
384 (c_segment_count >= c_segments_limit))
385 return (TRUE);
386
387 return (FALSE);
388}
389
390
391int
392vm_wants_task_throttled(task_t task)
393{
394 if (task == kernel_task)
395 return (0);
396
397 if (VM_CONFIG_SWAP_IS_ACTIVE) {
398 if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) &&
399 (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4))
400 return (1);
401 }
402 return (0);
403}
404
405
406#if DEVELOPMENT || DEBUG
407boolean_t kill_on_no_paging_space = FALSE; /* On compressor/swap exhaustion, kill the largest process regardless of
408 * its chosen process policy. Controlled by a boot-arg of the same name. */
409#endif /* DEVELOPMENT || DEBUG */
410
411#if !CONFIG_EMBEDDED
412
413static uint32_t no_paging_space_action_in_progress = 0;
414extern void memorystatus_send_low_swap_note(void);
415
416static void
417vm_compressor_take_paging_space_action(void)
418{
419 if (no_paging_space_action_in_progress == 0) {
420
421 if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) {
422
423 if (no_paging_space_action()) {
424#if DEVELOPMENT || DEBUG
425 if (kill_on_no_paging_space == TRUE) {
426 /*
427 * Since we are choosing to always kill a process, we don't need the
428 * "out of application memory" dialog box in this mode. And, hence we won't
429 * send the knote.
430 */
431 no_paging_space_action_in_progress = 0;
432 return;
433 }
434#endif /* DEVELOPMENT || DEBUG */
435 memorystatus_send_low_swap_note();
436 }
437
438 no_paging_space_action_in_progress = 0;
439 }
440 }
441}
442#endif /* !CONFIG_EMBEDDED */
443
444
445void
446vm_compressor_init_locks(void)
447{
448 lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr);
449 lck_grp_init(&vm_compressor_lck_grp, "vm_compressor", &vm_compressor_lck_grp_attr);
450 lck_attr_setdefault(&vm_compressor_lck_attr);
451
452 lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
453}
454
455
456void
457vm_decompressor_lock(void)
458{
459 PAGE_REPLACEMENT_ALLOWED(TRUE);
460
461 decompressions_blocked = TRUE;
462
463 PAGE_REPLACEMENT_ALLOWED(FALSE);
464}
465
466void
467vm_decompressor_unlock(void)
468{
469 PAGE_REPLACEMENT_ALLOWED(TRUE);
470
471 decompressions_blocked = FALSE;
472
473 PAGE_REPLACEMENT_ALLOWED(FALSE);
474
475 thread_wakeup((event_t)&decompressions_blocked);
476}
477
478static inline void cslot_copy(c_slot_t cdst, c_slot_t csrc) {
479#if CHECKSUM_THE_DATA
480 cdst->c_hash_data = csrc->c_hash_data;
481#endif
482#if CHECKSUM_THE_COMPRESSED_DATA
483 cdst->c_hash_compressed_data = csrc->c_hash_compressed_data;
484#endif
485#if POPCOUNT_THE_COMPRESSED_DATA
486 cdst->c_pop_cdata = csrc->c_pop_cdata;
487#endif
488 cdst->c_size = csrc->c_size;
489 cdst->c_packed_ptr = csrc->c_packed_ptr;
490#if defined(__arm__) || defined(__arm64__)
491 cdst->c_codec = csrc->c_codec;
492#endif
493}
494
495vm_map_t compressor_map;
496uint64_t compressor_pool_max_size;
497uint64_t compressor_pool_size;
498uint32_t compressor_pool_multiplier;
499
500#if DEVELOPMENT || DEBUG
501/*
502 * Compressor segments are write-protected in development/debug
503 * kernels to help debug memory corruption.
504 * In cases where performance is a concern, this can be disabled
505 * via the boot-arg "-disable_cseg_write_protection".
506 */
507boolean_t write_protect_c_segs = TRUE;
508int vm_compressor_test_seg_wp;
509uint32_t vm_ktrace_enabled;
510#endif /* DEVELOPMENT || DEBUG */
511
512void
513vm_compressor_init(void)
514{
515 thread_t thread;
516 struct c_slot cs_dummy;
517 c_slot_t cs = &cs_dummy;
518 int c_segment_min_size;
519 int c_segment_padded_size;
520 int attempts = 1;
521 kern_return_t retval = KERN_SUCCESS;
522 vm_offset_t start_addr = 0;
523 vm_size_t c_segments_arr_size = 0, compressor_submap_size = 0;
524 vm_map_kernel_flags_t vmk_flags;
525#if RECORD_THE_COMPRESSED_DATA
526 vm_size_t c_compressed_record_sbuf_size = 0;
527#endif /* RECORD_THE_COMPRESSED_DATA */
528
529#if DEVELOPMENT || DEBUG
530 char bootarg_name[32];
531 if (PE_parse_boot_argn("-kill_on_no_paging_space", bootarg_name, sizeof (bootarg_name))) {
532 kill_on_no_paging_space = TRUE;
533 }
534 if (PE_parse_boot_argn("-disable_cseg_write_protection", bootarg_name, sizeof (bootarg_name))) {
535 write_protect_c_segs = FALSE;
536 }
537 int vmcval = 1;
538 PE_parse_boot_argn("vm_compressor_validation", &vmcval, sizeof(vmcval));
539
540 if (kern_feature_override(KF_COMPRSV_OVRD)) {
541 vmcval = 0;
542 }
543 if (vmcval == 0) {
544#if POPCOUNT_THE_COMPRESSED_DATA
545 popcount_c_segs = FALSE;
546#endif
547#if CHECKSUM_THE_DATA || CHECKSUM_THE_COMPRESSED_DATA
548 checksum_c_segs = FALSE;
549#endif
550 write_protect_c_segs = FALSE;
551 }
552#endif /* DEVELOPMENT || DEBUG */
553
554 /*
555 * ensure that any pointer that gets created from
556 * the vm_page zone can be packed properly
557 */
558 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_min_address);
559
560 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_min_address)
561 panic("C_SLOT_UNPACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address);
562
563 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_max_address);
564
565 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_max_address)
566 panic("C_SLOT_UNPACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address);
567
568
569 assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE);
570
571 PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof (vm_compression_limit));
572
573#ifdef CONFIG_EMBEDDED
574 vm_compressor_minorcompact_threshold_divisor = 20;
575 vm_compressor_majorcompact_threshold_divisor = 30;
576 vm_compressor_unthrottle_threshold_divisor = 40;
577 vm_compressor_catchup_threshold_divisor = 60;
578#else
579 if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) {
580 vm_compressor_minorcompact_threshold_divisor = 11;
581 vm_compressor_majorcompact_threshold_divisor = 13;
582 vm_compressor_unthrottle_threshold_divisor = 20;
583 vm_compressor_catchup_threshold_divisor = 35;
584 } else {
585 vm_compressor_minorcompact_threshold_divisor = 20;
586 vm_compressor_majorcompact_threshold_divisor = 25;
587 vm_compressor_unthrottle_threshold_divisor = 35;
588 vm_compressor_catchup_threshold_divisor = 50;
589 }
590#endif
591 /*
592 * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks
593 * c_master_lock needs to be available early so that "vm_page_find_contiguous" can
594 * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor.
595 */
596
597 c_list_lock = lck_mtx_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr);
598
599 queue_init(&c_bad_list_head);
600 queue_init(&c_age_list_head);
601 queue_init(&c_minor_list_head);
602 queue_init(&c_major_list_head);
603 queue_init(&c_filling_list_head);
604 queue_init(&c_swapout_list_head);
605 queue_init(&c_swappedin_list_head);
606 queue_init(&c_swappedout_list_head);
607 queue_init(&c_swappedout_sparse_list_head);
608
609 c_free_segno_head = -1;
610 c_segments_available = 0;
611
612 if (vm_compression_limit)
613 compressor_pool_size = (uint64_t)vm_compression_limit * PAGE_SIZE_64;
614
615 compressor_pool_max_size = C_SEG_MAX_LIMIT;
616 compressor_pool_max_size *= C_SEG_BUFSIZE;
617
618#if defined(__x86_64__)
619
620 if (vm_compression_limit == 0) {
621
622 if (max_mem <= (4ULL * 1024ULL * 1024ULL * 1024ULL))
623 compressor_pool_size = 16ULL * max_mem;
624 else if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL))
625 compressor_pool_size = 8ULL * max_mem;
626 else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL))
627 compressor_pool_size = 4ULL * max_mem;
628 else
629 compressor_pool_size = 2ULL * max_mem;
630 }
631 if (max_mem <= (8ULL * 1024ULL * 1024ULL * 1024ULL))
632 compressor_pool_multiplier = 1;
633 else if (max_mem <= (32ULL * 1024ULL * 1024ULL * 1024ULL))
634 compressor_pool_multiplier = 2;
635 else
636 compressor_pool_multiplier = 4;
637
638#elif defined(__arm__)
639
640#define VM_RESERVE_SIZE (1024 * 1024 * 256)
641#define MAX_COMPRESSOR_POOL_SIZE (1024 * 1024 * 450)
642
643 if (compressor_pool_max_size > MAX_COMPRESSOR_POOL_SIZE)
644 compressor_pool_max_size = MAX_COMPRESSOR_POOL_SIZE;
645
646 if (vm_compression_limit == 0)
647 compressor_pool_size = ((kernel_map->max_offset - kernel_map->min_offset) - kernel_map->size) - VM_RESERVE_SIZE;
648 compressor_pool_multiplier = 1;
649#else
650 if (compressor_pool_max_size > max_mem)
651 compressor_pool_max_size = max_mem;
652
653 if (vm_compression_limit == 0)
654 compressor_pool_size = max_mem;
655 compressor_pool_multiplier = 1;
656#endif
657 if (compressor_pool_size > compressor_pool_max_size)
658 compressor_pool_size = compressor_pool_max_size;
659
660try_again:
661 c_segments_limit = (uint32_t)(compressor_pool_size / (vm_size_t)(C_SEG_ALLOCSIZE));
662 c_segments_nearing_limit = (uint32_t)(((uint64_t)c_segments_limit * 98ULL) / 100ULL);
663
664 c_segment_pages_compressed_limit = (c_segments_limit * (C_SEG_BUFSIZE / PAGE_SIZE) * compressor_pool_multiplier);
665
666 if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE))
667 c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE);
668
669 c_segment_pages_compressed_nearing_limit = (uint32_t)(((uint64_t)c_segment_pages_compressed_limit * 98ULL) / 100ULL);
670
671 /*
672 * Submap needs space for:
673 * - c_segments
674 * - c_buffers
675 * - swap reclaimations -- C_SEG_BUFSIZE
676 */
677 c_segments_arr_size = vm_map_round_page((sizeof(union c_segu) * c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
678 c_buffers_size = vm_map_round_page(((vm_size_t)C_SEG_ALLOCSIZE * (vm_size_t)c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
679
680 compressor_submap_size = c_segments_arr_size + c_buffers_size + C_SEG_BUFSIZE;
681
682#if RECORD_THE_COMPRESSED_DATA
683 c_compressed_record_sbuf_size = (vm_size_t)C_SEG_ALLOCSIZE + (PAGE_SIZE * 2);
684 compressor_submap_size += c_compressed_record_sbuf_size;
685#endif /* RECORD_THE_COMPRESSED_DATA */
686
687 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
688 vmk_flags.vmkf_permanent = TRUE;
689 retval = kmem_suballoc(kernel_map, &start_addr, compressor_submap_size,
690 FALSE, VM_FLAGS_ANYWHERE, vmk_flags, VM_KERN_MEMORY_COMPRESSOR,
691 &compressor_map);
692
693 if (retval != KERN_SUCCESS) {
694 if (++attempts > 3)
695 panic("vm_compressor_init: kmem_suballoc failed - 0x%llx", (uint64_t)compressor_submap_size);
696
697 compressor_pool_size = compressor_pool_size / 2;
698
699 kprintf("retrying creation of the compressor submap at 0x%llx bytes\n", compressor_pool_size);
700 goto try_again;
701 }
702 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
703 panic("vm_compressor_init: kernel_memory_allocate failed - c_segments\n");
704 if (kernel_memory_allocate(compressor_map, &c_buffers, c_buffers_size, 0, KMA_COMPRESSOR | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
705 panic("vm_compressor_init: kernel_memory_allocate failed - c_buffers\n");
706
707
708 c_segment_min_size = sizeof(struct c_segment) + (C_SEG_SLOT_VAR_ARRAY_MIN_LEN * sizeof(struct c_slot));
709
710 for (c_segment_padded_size = 128; c_segment_padded_size < c_segment_min_size; c_segment_padded_size = c_segment_padded_size << 1);
711
712 compressor_segment_zone = zinit(c_segment_padded_size, c_segments_limit * c_segment_padded_size, PAGE_SIZE, "compressor_segment");
713 zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE);
714 zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE);
715
716 c_seg_fixed_array_len = (c_segment_padded_size - sizeof(struct c_segment)) / sizeof(struct c_slot);
717
718 c_segments_busy = FALSE;
719
720 c_segments_next_page = (caddr_t)c_segments;
721 vm_compressor_algorithm_init();
722
723 {
724 host_basic_info_data_t hinfo;
725 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
726
727#define BSD_HOST 1
728 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
729
730 compressor_cpus = hinfo.max_cpus;
731 compressor_scratch_bufs = kalloc_tag(compressor_cpus * vm_compressor_get_decode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
732
733 kdp_compressor_scratch_buf = kalloc_tag(vm_compressor_get_decode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
734 kdp_compressor_decompressed_page = kalloc_tag(PAGE_SIZE, VM_KERN_MEMORY_COMPRESSOR);
735 kdp_compressor_decompressed_page_paddr = kvtophys((vm_offset_t)kdp_compressor_decompressed_page);
736 kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr);
737 }
738#if CONFIG_FREEZE
739 freezer_compressor_scratch_buf = kalloc_tag(vm_compressor_get_encode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
740#endif
741
742#if RECORD_THE_COMPRESSED_DATA
743 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)&c_compressed_record_sbuf, c_compressed_record_sbuf_size, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
744 panic("vm_compressor_init: kernel_memory_allocate failed - c_compressed_record_sbuf\n");
745
746 c_compressed_record_cptr = c_compressed_record_sbuf;
747 c_compressed_record_ebuf = c_compressed_record_sbuf + c_compressed_record_sbuf_size;
748#endif
749
750 if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL,
751 BASEPRI_VM, &thread) != KERN_SUCCESS) {
752 panic("vm_compressor_swap_trigger_thread: create failed");
753 }
754 thread_deallocate(thread);
755
756 if (vm_pageout_internal_start() != KERN_SUCCESS) {
757 panic("vm_compressor_init: Failed to start the internal pageout thread.\n");
758 }
759 if (VM_CONFIG_SWAP_IS_PRESENT)
760 vm_compressor_swap_init();
761
762 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE)
763 vm_compressor_is_active = 1;
764
765#if CONFIG_FREEZE
766 memorystatus_freeze_enabled = TRUE;
767#endif /* CONFIG_FREEZE */
768
769 vm_compressor_available = 1;
770
771 vm_page_reactivate_all_throttled();
772}
773
774
775#if VALIDATE_C_SEGMENTS
776
777static void
778c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact)
779{
780 int c_indx;
781 int32_t bytes_used;
782 uint32_t c_rounded_size;
783 uint32_t c_size;
784 c_slot_t cs;
785
786 if (c_seg->c_firstemptyslot < c_seg->c_nextslot) {
787 c_indx = c_seg->c_firstemptyslot;
788 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
789
790 if (cs == NULL)
791 panic("c_seg_validate: no slot backing c_firstemptyslot");
792
793 if (cs->c_size)
794 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size);
795 }
796 bytes_used = 0;
797
798 for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) {
799
800 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
801
802 c_size = UNPACK_C_SIZE(cs);
803
804 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
805
806 bytes_used += c_rounded_size;
807
808#if CHECKSUM_THE_COMPRESSED_DATA
809 unsigned csvhash;
810 if (c_size && cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
811 addr64_t csvphys = kvtophys((vm_offset_t)&c_seg->c_store.c_buffer[cs->c_offset]);
812 panic("Compressed data doesn't match original %p phys: 0x%llx %d %p %d %d 0x%x 0x%x", c_seg, csvphys, cs->c_offset, cs, c_indx, c_size, cs->c_hash_compressed_data, csvhash);
813 }
814#endif
815 }
816
817 if (bytes_used != c_seg->c_bytes_used)
818 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used);
819
820 if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset))
821 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
822 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
823
824 if (must_be_compact) {
825 if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset))
826 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
827 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
828 }
829}
830
831#endif
832
833
834void
835c_seg_need_delayed_compaction(c_segment_t c_seg, boolean_t c_list_lock_held)
836{
837 boolean_t clear_busy = FALSE;
838
839 if (c_list_lock_held == FALSE) {
840 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
841 C_SEG_BUSY(c_seg);
842
843 lck_mtx_unlock_always(&c_seg->c_lock);
844 lck_mtx_lock_spin_always(c_list_lock);
845 lck_mtx_lock_spin_always(&c_seg->c_lock);
846
847 clear_busy = TRUE;
848 }
849 }
850 assert(c_seg->c_state != C_IS_FILLING);
851
852 if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ONDISK(c_seg))) {
853 queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list);
854 c_seg->c_on_minorcompact_q = 1;
855 c_minor_count++;
856 }
857 if (c_list_lock_held == FALSE)
858 lck_mtx_unlock_always(c_list_lock);
859
860 if (clear_busy == TRUE)
861 C_SEG_WAKEUP_DONE(c_seg);
862}
863
864
865unsigned int c_seg_moved_to_sparse_list = 0;
866
867void
868c_seg_move_to_sparse_list(c_segment_t c_seg)
869{
870 boolean_t clear_busy = FALSE;
871
872 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
873 C_SEG_BUSY(c_seg);
874
875 lck_mtx_unlock_always(&c_seg->c_lock);
876 lck_mtx_lock_spin_always(c_list_lock);
877 lck_mtx_lock_spin_always(&c_seg->c_lock);
878
879 clear_busy = TRUE;
880 }
881 c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE);
882
883 c_seg_moved_to_sparse_list++;
884
885 lck_mtx_unlock_always(c_list_lock);
886
887 if (clear_busy == TRUE)
888 C_SEG_WAKEUP_DONE(c_seg);
889}
890
891
892void
893c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg)
894{
895 c_segment_t c_seg_next;
896
897 if (queue_empty(qhead)) {
898 queue_enter(qhead, c_seg, c_segment_t, c_age_list);
899 } else {
900 c_seg_next = (c_segment_t)queue_first(qhead);
901
902 while (TRUE) {
903
904 if (c_seg->c_generation_id < c_seg_next->c_generation_id) {
905 queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list);
906 break;
907 }
908 c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list);
909
910 if (queue_end(qhead, (queue_entry_t) c_seg_next)) {
911 queue_enter(qhead, c_seg, c_segment_t, c_age_list);
912 break;
913 }
914 }
915 }
916}
917
918
919int try_minor_compaction_failed = 0;
920int try_minor_compaction_succeeded = 0;
921
922void
923c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg)
924{
925
926 assert(c_seg->c_on_minorcompact_q);
927 /*
928 * c_seg is currently on the delayed minor compaction
929 * queue and we have c_seg locked... if we can get the
930 * c_list_lock w/o blocking (if we blocked we could deadlock
931 * because the lock order is c_list_lock then c_seg's lock)
932 * we'll pull it from the delayed list and free it directly
933 */
934 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
935 /*
936 * c_list_lock is held, we need to bail
937 */
938 try_minor_compaction_failed++;
939
940 lck_mtx_unlock_always(&c_seg->c_lock);
941 } else {
942 try_minor_compaction_succeeded++;
943
944 C_SEG_BUSY(c_seg);
945 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE);
946 }
947}
948
949
950int
951c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement)
952{
953 int c_seg_freed;
954
955 assert(c_seg->c_busy);
956
957 /*
958 * check for the case that can occur when we are not swapping
959 * and this segment has been major compacted in the past
960 * and moved to the majorcompact q to remove it from further
961 * consideration... if the occupancy falls too low we need
962 * to put it back on the age_q so that it will be considered
963 * in the next major compaction sweep... if we don't do this
964 * we will eventually run into the c_segments_limit
965 */
966 if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT_NOW(c_seg)) {
967
968 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
969 }
970 if (!c_seg->c_on_minorcompact_q) {
971 if (clear_busy == TRUE)
972 C_SEG_WAKEUP_DONE(c_seg);
973
974 lck_mtx_unlock_always(&c_seg->c_lock);
975
976 return (0);
977 }
978 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list);
979 c_seg->c_on_minorcompact_q = 0;
980 c_minor_count--;
981
982 lck_mtx_unlock_always(c_list_lock);
983
984 if (disallow_page_replacement == TRUE) {
985 lck_mtx_unlock_always(&c_seg->c_lock);
986
987 PAGE_REPLACEMENT_DISALLOWED(TRUE);
988
989 lck_mtx_lock_spin_always(&c_seg->c_lock);
990 }
991 c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy);
992
993 if (disallow_page_replacement == TRUE)
994 PAGE_REPLACEMENT_DISALLOWED(FALSE);
995
996 if (need_list_lock == TRUE)
997 lck_mtx_lock_spin_always(c_list_lock);
998
999 return (c_seg_freed);
1000}
1001
1002
1003void
1004c_seg_wait_on_busy(c_segment_t c_seg)
1005{
1006 c_seg->c_wanted = 1;
1007 assert_wait((event_t) (c_seg), THREAD_UNINT);
1008
1009 lck_mtx_unlock_always(&c_seg->c_lock);
1010 thread_block(THREAD_CONTINUE_NULL);
1011}
1012
1013
1014void
1015c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head)
1016{
1017 int old_state = c_seg->c_state;
1018
1019#if __i386__ || __x86_64__
1020 if (new_state != C_IS_FILLING)
1021 LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
1022 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
1023#endif
1024 switch (old_state) {
1025
1026 case C_IS_EMPTY:
1027 assert(new_state == C_IS_FILLING || new_state == C_IS_FREE);
1028
1029 c_empty_count--;
1030 break;
1031
1032 case C_IS_FILLING:
1033 assert(new_state == C_ON_AGE_Q || new_state == C_ON_SWAPOUT_Q);
1034
1035 queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1036 c_filling_count--;
1037 break;
1038
1039 case C_ON_AGE_Q:
1040 assert(new_state == C_ON_SWAPOUT_Q || new_state == C_ON_MAJORCOMPACT_Q ||
1041 new_state == C_IS_FREE);
1042
1043 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1044 c_age_count--;
1045 break;
1046
1047 case C_ON_SWAPPEDIN_Q:
1048 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
1049
1050 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1051 c_swappedin_count--;
1052 break;
1053
1054 case C_ON_SWAPOUT_Q:
1055 assert(new_state == C_ON_SWAPPEDOUT_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
1056 new_state == C_ON_AGE_Q || new_state == C_IS_FREE || new_state == C_IS_EMPTY);
1057
1058 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1059 thread_wakeup((event_t)&compaction_swapper_running);
1060 c_swapout_count--;
1061 break;
1062
1063 case C_ON_SWAPPEDOUT_Q:
1064 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
1065 new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
1066 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
1067
1068 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1069 c_swappedout_count--;
1070 break;
1071
1072 case C_ON_SWAPPEDOUTSPARSE_Q:
1073 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
1074 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
1075
1076 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1077 c_swappedout_sparse_count--;
1078 break;
1079
1080 case C_ON_MAJORCOMPACT_Q:
1081 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
1082
1083 queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1084 c_major_count--;
1085 break;
1086
1087 case C_ON_BAD_Q:
1088 assert(new_state == C_IS_FREE);
1089
1090 queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1091 c_bad_count--;
1092 break;
1093
1094 default:
1095 panic("c_seg %p has bad c_state = %d\n", c_seg, old_state);
1096 }
1097
1098 switch(new_state) {
1099 case C_IS_FREE:
1100 assert(old_state != C_IS_FILLING);
1101
1102 break;
1103
1104 case C_IS_EMPTY:
1105 assert(old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1106
1107 c_empty_count++;
1108 break;
1109
1110 case C_IS_FILLING:
1111 assert(old_state == C_IS_EMPTY);
1112
1113 queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
1114 c_filling_count++;
1115 break;
1116
1117 case C_ON_AGE_Q:
1118 assert(old_state == C_IS_FILLING || old_state == C_ON_SWAPPEDIN_Q || old_state == C_ON_SWAPOUT_Q ||
1119 old_state == C_ON_MAJORCOMPACT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
1120
1121 if (old_state == C_IS_FILLING)
1122 queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1123 else {
1124 if (!queue_empty(&c_age_list_head)) {
1125 c_segment_t c_first;
1126
1127 c_first = (c_segment_t)queue_first(&c_age_list_head);
1128 c_seg->c_creation_ts = c_first->c_creation_ts;
1129 }
1130 queue_enter_first(&c_age_list_head, c_seg, c_segment_t, c_age_list);
1131 }
1132 c_age_count++;
1133 break;
1134
1135 case C_ON_SWAPPEDIN_Q:
1136 assert(c_seg->c_state == C_ON_SWAPPEDOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
1137
1138 if (insert_head == TRUE)
1139 queue_enter_first(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1140 else
1141 queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
1142 c_swappedin_count++;
1143 break;
1144
1145 case C_ON_SWAPOUT_Q:
1146 assert(old_state == C_ON_AGE_Q || old_state == C_IS_FILLING);
1147
1148 if (insert_head == TRUE)
1149 queue_enter_first(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1150 else
1151 queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
1152 c_swapout_count++;
1153 break;
1154
1155 case C_ON_SWAPPEDOUT_Q:
1156 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1157
1158 if (insert_head == TRUE)
1159 queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1160 else
1161 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
1162 c_swappedout_count++;
1163 break;
1164
1165 case C_ON_SWAPPEDOUTSPARSE_Q:
1166 assert(c_seg->c_state == C_ON_SWAPOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUT_Q);
1167
1168 if (insert_head == TRUE)
1169 queue_enter_first(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1170 else
1171 queue_enter(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1172
1173 c_swappedout_sparse_count++;
1174 break;
1175
1176 case C_ON_MAJORCOMPACT_Q:
1177 assert(c_seg->c_state == C_ON_AGE_Q);
1178
1179 if (insert_head == TRUE)
1180 queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1181 else
1182 queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1183 c_major_count++;
1184 break;
1185
1186 case C_ON_BAD_Q:
1187 assert(c_seg->c_state == C_ON_SWAPPEDOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
1188
1189 if (insert_head == TRUE)
1190 queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1191 else
1192 queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1193 c_bad_count++;
1194 break;
1195
1196 default:
1197 panic("c_seg %p requesting bad c_state = %d\n", c_seg, new_state);
1198 }
1199 c_seg->c_state = new_state;
1200}
1201
1202
1203
1204void
1205c_seg_free(c_segment_t c_seg)
1206{
1207 assert(c_seg->c_busy);
1208
1209 lck_mtx_unlock_always(&c_seg->c_lock);
1210 lck_mtx_lock_spin_always(c_list_lock);
1211 lck_mtx_lock_spin_always(&c_seg->c_lock);
1212
1213 c_seg_free_locked(c_seg);
1214}
1215
1216
1217void
1218c_seg_free_locked(c_segment_t c_seg)
1219{
1220 int segno;
1221 int pages_populated = 0;
1222 int32_t *c_buffer = NULL;
1223 uint64_t c_swap_handle = 0;
1224
1225 assert(c_seg->c_busy);
1226 assert(c_seg->c_slots_used == 0);
1227 assert(!c_seg->c_on_minorcompact_q);
1228 assert(!c_seg->c_busy_swapping);
1229
1230 if (c_seg->c_overage_swap == TRUE) {
1231 c_overage_swapped_count--;
1232 c_seg->c_overage_swap = FALSE;
1233 }
1234 if ( !(C_SEG_IS_ONDISK(c_seg)))
1235 c_buffer = c_seg->c_store.c_buffer;
1236 else
1237 c_swap_handle = c_seg->c_store.c_swap_handle;
1238
1239 c_seg_switch_state(c_seg, C_IS_FREE, FALSE);
1240
1241 lck_mtx_unlock_always(c_list_lock);
1242
1243 if (c_buffer) {
1244 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
1245 c_seg->c_store.c_buffer = NULL;
1246 } else
1247 c_seg->c_store.c_swap_handle = (uint64_t)-1;
1248
1249 lck_mtx_unlock_always(&c_seg->c_lock);
1250
1251 if (c_buffer) {
1252 if (pages_populated)
1253 kernel_memory_depopulate(compressor_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
1254
1255 } else if (c_swap_handle) {
1256 /*
1257 * Free swap space on disk.
1258 */
1259 vm_swap_free(c_swap_handle);
1260 }
1261 lck_mtx_lock_spin_always(&c_seg->c_lock);
1262 /*
1263 * c_seg must remain busy until
1264 * after the call to vm_swap_free
1265 */
1266 C_SEG_WAKEUP_DONE(c_seg);
1267 lck_mtx_unlock_always(&c_seg->c_lock);
1268
1269 segno = c_seg->c_mysegno;
1270
1271 lck_mtx_lock_spin_always(c_list_lock);
1272 /*
1273 * because the c_buffer is now associated with the segno,
1274 * we can't put the segno back on the free list until
1275 * after we have depopulated the c_buffer range, or
1276 * we run the risk of depopulating a range that is
1277 * now being used in one of the compressor heads
1278 */
1279 c_segments[segno].c_segno = c_free_segno_head;
1280 c_free_segno_head = segno;
1281 c_segment_count--;
1282
1283 lck_mtx_unlock_always(c_list_lock);
1284
1285 lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp);
1286
1287 if (c_seg->c_slot_var_array_len)
1288 kfree(c_seg->c_slot_var_array, sizeof(struct c_slot) * c_seg->c_slot_var_array_len);
1289
1290 zfree(compressor_segment_zone, c_seg);
1291}
1292
1293#if DEVELOPMENT || DEBUG
1294int c_seg_trim_page_count = 0;
1295#endif
1296
1297void
1298c_seg_trim_tail(c_segment_t c_seg)
1299{
1300 c_slot_t cs;
1301 uint32_t c_size;
1302 uint32_t c_offset;
1303 uint32_t c_rounded_size;
1304 uint16_t current_nextslot;
1305 uint32_t current_populated_offset;
1306
1307 if (c_seg->c_bytes_used == 0)
1308 return;
1309 current_nextslot = c_seg->c_nextslot;
1310 current_populated_offset = c_seg->c_populated_offset;
1311
1312 while (c_seg->c_nextslot) {
1313
1314 cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1));
1315
1316 c_size = UNPACK_C_SIZE(cs);
1317
1318 if (c_size) {
1319 if (current_nextslot != c_seg->c_nextslot) {
1320 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1321 c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1322
1323 c_seg->c_nextoffset = c_offset;
1324 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) &
1325 ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1326
1327 if (c_seg->c_firstemptyslot > c_seg->c_nextslot)
1328 c_seg->c_firstemptyslot = c_seg->c_nextslot;
1329#if DEVELOPMENT || DEBUG
1330 c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) -
1331 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) /
1332 PAGE_SIZE);
1333#endif
1334 }
1335 break;
1336 }
1337 c_seg->c_nextslot--;
1338 }
1339 assert(c_seg->c_nextslot);
1340}
1341
1342
1343int
1344c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy)
1345{
1346 c_slot_mapping_t slot_ptr;
1347 uint32_t c_offset = 0;
1348 uint32_t old_populated_offset;
1349 uint32_t c_rounded_size;
1350 uint32_t c_size;
1351 int c_indx = 0;
1352 int i;
1353 c_slot_t c_dst;
1354 c_slot_t c_src;
1355
1356 assert(c_seg->c_busy);
1357
1358#if VALIDATE_C_SEGMENTS
1359 c_seg_validate(c_seg, FALSE);
1360#endif
1361 if (c_seg->c_bytes_used == 0) {
1362 c_seg_free(c_seg);
1363 return (1);
1364 }
1365 lck_mtx_unlock_always(&c_seg->c_lock);
1366
1367 if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE)
1368 goto done;
1369
1370/* TODO: assert first emptyslot's c_size is actually 0 */
1371
1372#if DEVELOPMENT || DEBUG
1373 C_SEG_MAKE_WRITEABLE(c_seg);
1374#endif
1375
1376#if VALIDATE_C_SEGMENTS
1377 c_seg->c_was_minor_compacted++;
1378#endif
1379 c_indx = c_seg->c_firstemptyslot;
1380 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1381
1382 old_populated_offset = c_seg->c_populated_offset;
1383 c_offset = c_dst->c_offset;
1384
1385 for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) {
1386
1387 c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i);
1388
1389 c_size = UNPACK_C_SIZE(c_src);
1390
1391 if (c_size == 0)
1392 continue;
1393
1394 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1395/* N.B.: This memcpy may be an overlapping copy */
1396 memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_rounded_size);
1397
1398 cslot_copy(c_dst, c_src);
1399 c_dst->c_offset = c_offset;
1400
1401 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
1402 slot_ptr->s_cindx = c_indx;
1403
1404 c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1405 PACK_C_SIZE(c_src, 0);
1406 c_indx++;
1407
1408 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1409 }
1410 c_seg->c_firstemptyslot = c_indx;
1411 c_seg->c_nextslot = c_indx;
1412 c_seg->c_nextoffset = c_offset;
1413 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1414 c_seg->c_bytes_unused = 0;
1415
1416#if VALIDATE_C_SEGMENTS
1417 c_seg_validate(c_seg, TRUE);
1418#endif
1419 if (old_populated_offset > c_seg->c_populated_offset) {
1420 uint32_t gc_size;
1421 int32_t *gc_ptr;
1422
1423 gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset);
1424 gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset];
1425
1426 kernel_memory_depopulate(compressor_map, (vm_offset_t)gc_ptr, gc_size, KMA_COMPRESSOR);
1427 }
1428
1429#if DEVELOPMENT || DEBUG
1430 C_SEG_WRITE_PROTECT(c_seg);
1431#endif
1432
1433done:
1434 if (clear_busy == TRUE) {
1435 lck_mtx_lock_spin_always(&c_seg->c_lock);
1436 C_SEG_WAKEUP_DONE(c_seg);
1437 lck_mtx_unlock_always(&c_seg->c_lock);
1438 }
1439 return (0);
1440}
1441
1442
1443static void
1444c_seg_alloc_nextslot(c_segment_t c_seg)
1445{
1446 struct c_slot *old_slot_array = NULL;
1447 struct c_slot *new_slot_array = NULL;
1448 int newlen;
1449 int oldlen;
1450
1451 if (c_seg->c_nextslot < c_seg_fixed_array_len)
1452 return;
1453
1454 if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) {
1455
1456 oldlen = c_seg->c_slot_var_array_len;
1457 old_slot_array = c_seg->c_slot_var_array;
1458
1459 if (oldlen == 0)
1460 newlen = C_SEG_SLOT_VAR_ARRAY_MIN_LEN;
1461 else
1462 newlen = oldlen * 2;
1463
1464 new_slot_array = (struct c_slot *)kalloc(sizeof(struct c_slot) * newlen);
1465
1466 lck_mtx_lock_spin_always(&c_seg->c_lock);
1467
1468 if (old_slot_array)
1469 memcpy((char *)new_slot_array, (char *)old_slot_array, sizeof(struct c_slot) * oldlen);
1470
1471 c_seg->c_slot_var_array_len = newlen;
1472 c_seg->c_slot_var_array = new_slot_array;
1473
1474 lck_mtx_unlock_always(&c_seg->c_lock);
1475
1476 if (old_slot_array)
1477 kfree(old_slot_array, sizeof(struct c_slot) * oldlen);
1478 }
1479}
1480
1481
1482
1483struct {
1484 uint64_t asked_permission;
1485 uint64_t compactions;
1486 uint64_t moved_slots;
1487 uint64_t moved_bytes;
1488 uint64_t wasted_space_in_swapouts;
1489 uint64_t count_of_swapouts;
1490 uint64_t count_of_freed_segs;
1491} c_seg_major_compact_stats;
1492
1493
1494#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 90) / 100)
1495
1496
1497boolean_t
1498c_seg_major_compact_ok(
1499 c_segment_t c_seg_dst,
1500 c_segment_t c_seg_src)
1501{
1502
1503 c_seg_major_compact_stats.asked_permission++;
1504
1505 if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE &&
1506 c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE)
1507 return (FALSE);
1508
1509 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
1510 /*
1511 * destination segment is full... can't compact
1512 */
1513 return (FALSE);
1514 }
1515
1516 return (TRUE);
1517}
1518
1519
1520boolean_t
1521c_seg_major_compact(
1522 c_segment_t c_seg_dst,
1523 c_segment_t c_seg_src)
1524{
1525 c_slot_mapping_t slot_ptr;
1526 uint32_t c_rounded_size;
1527 uint32_t c_size;
1528 uint16_t dst_slot;
1529 int i;
1530 c_slot_t c_dst;
1531 c_slot_t c_src;
1532 boolean_t keep_compacting = TRUE;
1533
1534 /*
1535 * segments are not locked but they are both marked c_busy
1536 * which keeps c_decompress from working on them...
1537 * we can safely allocate new pages, move compressed data
1538 * from c_seg_src to c_seg_dst and update both c_segment's
1539 * state w/o holding the master lock
1540 */
1541#if DEVELOPMENT || DEBUG
1542 C_SEG_MAKE_WRITEABLE(c_seg_dst);
1543#endif
1544
1545#if VALIDATE_C_SEGMENTS
1546 c_seg_dst->c_was_major_compacted++;
1547 c_seg_src->c_was_major_donor++;
1548#endif
1549 c_seg_major_compact_stats.compactions++;
1550
1551 dst_slot = c_seg_dst->c_nextslot;
1552
1553 for (i = 0; i < c_seg_src->c_nextslot; i++) {
1554
1555 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i);
1556
1557 c_size = UNPACK_C_SIZE(c_src);
1558
1559 if (c_size == 0) {
1560 /* BATCH: move what we have so far; */
1561 continue;
1562 }
1563
1564 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) {
1565 int size_to_populate;
1566
1567 /* doesn't fit */
1568 size_to_populate = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset);
1569
1570 if (size_to_populate == 0) {
1571 /* can't fit */
1572 keep_compacting = FALSE;
1573 break;
1574 }
1575 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
1576 size_to_populate = C_SEG_MAX_POPULATE_SIZE;
1577
1578 kernel_memory_populate(compressor_map,
1579 (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset],
1580 size_to_populate,
1581 KMA_COMPRESSOR,
1582 VM_KERN_MEMORY_COMPRESSOR);
1583
1584 c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
1585 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE);
1586 }
1587 c_seg_alloc_nextslot(c_seg_dst);
1588
1589 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
1590
1591 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
1592
1593 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1594
1595 c_seg_major_compact_stats.moved_slots++;
1596 c_seg_major_compact_stats.moved_bytes += c_size;
1597
1598 cslot_copy(c_dst, c_src);
1599 c_dst->c_offset = c_seg_dst->c_nextoffset;
1600
1601 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
1602 c_seg_dst->c_firstemptyslot++;
1603 c_seg_dst->c_slots_used++;
1604 c_seg_dst->c_nextslot++;
1605 c_seg_dst->c_bytes_used += c_rounded_size;
1606 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1607
1608 PACK_C_SIZE(c_src, 0);
1609
1610 c_seg_src->c_bytes_used -= c_rounded_size;
1611 c_seg_src->c_bytes_unused += c_rounded_size;
1612 c_seg_src->c_firstemptyslot = 0;
1613
1614 assert(c_seg_src->c_slots_used);
1615 c_seg_src->c_slots_used--;
1616
1617 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
1618 /* dest segment is now full */
1619 keep_compacting = FALSE;
1620 break;
1621 }
1622 }
1623#if DEVELOPMENT || DEBUG
1624 C_SEG_WRITE_PROTECT(c_seg_dst);
1625#endif
1626 if (dst_slot < c_seg_dst->c_nextslot) {
1627
1628 PAGE_REPLACEMENT_ALLOWED(TRUE);
1629 /*
1630 * we've now locked out c_decompress from
1631 * converting the slot passed into it into
1632 * a c_segment_t which allows us to use
1633 * the backptr to change which c_segment and
1634 * index the slot points to
1635 */
1636 while (dst_slot < c_seg_dst->c_nextslot) {
1637
1638 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
1639
1640 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
1641 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
1642 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
1643 slot_ptr->s_cindx = dst_slot++;
1644 }
1645 PAGE_REPLACEMENT_ALLOWED(FALSE);
1646 }
1647 return (keep_compacting);
1648}
1649
1650
1651uint64_t
1652vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec)
1653{
1654 uint64_t end_msecs;
1655 uint64_t start_msecs;
1656
1657 end_msecs = (end_sec * 1000) + end_nsec / 1000000;
1658 start_msecs = (start_sec * 1000) + start_nsec / 1000000;
1659
1660 return (end_msecs - start_msecs);
1661}
1662
1663
1664
1665uint32_t compressor_eval_period_in_msecs = 250;
1666uint32_t compressor_sample_min_in_msecs = 500;
1667uint32_t compressor_sample_max_in_msecs = 10000;
1668uint32_t compressor_thrashing_threshold_per_10msecs = 50;
1669uint32_t compressor_thrashing_min_per_10msecs = 20;
1670
1671/* When true, reset sample data next chance we get. */
1672static boolean_t compressor_need_sample_reset = FALSE;
1673
1674extern uint32_t vm_page_filecache_min;
1675
1676
1677void
1678compute_swapout_target_age(void)
1679{
1680 clock_sec_t cur_ts_sec;
1681 clock_nsec_t cur_ts_nsec;
1682 uint32_t min_operations_needed_in_this_sample;
1683 uint64_t elapsed_msecs_in_eval;
1684 uint64_t elapsed_msecs_in_sample;
1685 boolean_t need_eval_reset = FALSE;
1686
1687 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
1688
1689 elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec);
1690
1691 if (compressor_need_sample_reset ||
1692 elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) {
1693 compressor_need_sample_reset = TRUE;
1694 need_eval_reset = TRUE;
1695 goto done;
1696 }
1697 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec);
1698
1699 if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs)
1700 goto done;
1701 need_eval_reset = TRUE;
1702
1703 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0);
1704
1705 min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10;
1706
1707 if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample ||
1708 (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) {
1709
1710 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count,
1711 sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0);
1712
1713 swapout_target_age = 0;
1714
1715 compressor_need_sample_reset = TRUE;
1716 need_eval_reset = TRUE;
1717 goto done;
1718 }
1719 last_eval_compression_count = sample_period_compression_count;
1720 last_eval_decompression_count = sample_period_decompression_count;
1721
1722 if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) {
1723
1724 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0);
1725 goto done;
1726 }
1727 if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) {
1728
1729 uint64_t running_total;
1730 uint64_t working_target;
1731 uint64_t aging_target;
1732 uint32_t oldest_age_of_csegs_sampled = 0;
1733 uint64_t working_set_approximation = 0;
1734
1735 swapout_target_age = 0;
1736
1737 working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */
1738 aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */
1739 running_total = 0;
1740
1741 for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) {
1742
1743 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1744
1745 working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1746
1747 if (running_total >= working_target)
1748 break;
1749 }
1750 if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) {
1751
1752 working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample;
1753
1754 if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) {
1755
1756 running_total = overage_decompressions_during_sample_period;
1757
1758 for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) {
1759 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1760
1761 if (running_total >= aging_target)
1762 break;
1763 }
1764 swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled;
1765
1766 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0);
1767 } else {
1768 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0);
1769 }
1770 } else
1771 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0);
1772
1773 compressor_need_sample_reset = TRUE;
1774 need_eval_reset = TRUE;
1775 } else
1776 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0);
1777done:
1778 if (compressor_need_sample_reset == TRUE) {
1779 bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period));
1780 overage_decompressions_during_sample_period = 0;
1781
1782 start_of_sample_period_sec = cur_ts_sec;
1783 start_of_sample_period_nsec = cur_ts_nsec;
1784 sample_period_decompression_count = 0;
1785 sample_period_compression_count = 0;
1786 last_eval_decompression_count = 0;
1787 last_eval_compression_count = 0;
1788 compressor_need_sample_reset = FALSE;
1789 }
1790 if (need_eval_reset == TRUE) {
1791 start_of_eval_period_sec = cur_ts_sec;
1792 start_of_eval_period_nsec = cur_ts_nsec;
1793 }
1794}
1795
1796
1797int compaction_swapper_init_now = 0;
1798int compaction_swapper_running = 0;
1799int compaction_swapper_awakened = 0;
1800int compaction_swapper_abort = 0;
1801
1802
1803#if CONFIG_JETSAM
1804boolean_t memorystatus_kill_on_VM_thrashing(boolean_t);
1805boolean_t memorystatus_kill_on_FC_thrashing(boolean_t);
1806int compressor_thrashing_induced_jetsam = 0;
1807int filecache_thrashing_induced_jetsam = 0;
1808static boolean_t vm_compressor_thrashing_detected = FALSE;
1809#endif /* CONFIG_JETSAM */
1810
1811static boolean_t
1812compressor_needs_to_swap(void)
1813{
1814 boolean_t should_swap = FALSE;
1815
1816 if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) {
1817 c_segment_t c_seg;
1818 clock_sec_t now;
1819 clock_sec_t age;
1820 clock_nsec_t nsec;
1821
1822 clock_get_system_nanotime(&now, &nsec);
1823 age = 0;
1824
1825 lck_mtx_lock_spin_always(c_list_lock);
1826
1827 if ( !queue_empty(&c_age_list_head)) {
1828 c_seg = (c_segment_t) queue_first(&c_age_list_head);
1829
1830 age = now - c_seg->c_creation_ts;
1831 }
1832 lck_mtx_unlock_always(c_list_lock);
1833
1834 if (age >= vm_ripe_target_age)
1835 return (TRUE);
1836 }
1837 if (VM_CONFIG_SWAP_IS_ACTIVE) {
1838 if (COMPRESSOR_NEEDS_TO_SWAP()) {
1839 return (TRUE);
1840 }
1841 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) {
1842 return (TRUE);
1843 }
1844 if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * 2)))
1845 return (TRUE);
1846 }
1847 compute_swapout_target_age();
1848
1849 if (swapout_target_age) {
1850 c_segment_t c_seg;
1851
1852 lck_mtx_lock_spin_always(c_list_lock);
1853
1854 if (!queue_empty(&c_age_list_head)) {
1855
1856 c_seg = (c_segment_t) queue_first(&c_age_list_head);
1857
1858 if (c_seg->c_creation_ts > swapout_target_age)
1859 swapout_target_age = 0;
1860 }
1861 lck_mtx_unlock_always(c_list_lock);
1862 }
1863#if CONFIG_PHANTOM_CACHE
1864 if (vm_phantom_cache_check_pressure())
1865 should_swap = TRUE;
1866#endif
1867 if (swapout_target_age)
1868 should_swap = TRUE;
1869
1870#if CONFIG_JETSAM
1871 if (should_swap || vm_compressor_low_on_space() == TRUE) {
1872
1873 if (vm_compressor_thrashing_detected == FALSE) {
1874 vm_compressor_thrashing_detected = TRUE;
1875
1876 if (swapout_target_age || vm_compressor_low_on_space() == TRUE) {
1877 memorystatus_kill_on_VM_thrashing(TRUE /* async */);
1878 compressor_thrashing_induced_jetsam++;
1879 } else {
1880 memorystatus_kill_on_FC_thrashing(TRUE /* async */);
1881 filecache_thrashing_induced_jetsam++;
1882 }
1883 }
1884 /*
1885 * let the jetsam take precedence over
1886 * any major compactions we might have
1887 * been able to do... otherwise we run
1888 * the risk of doing major compactions
1889 * on segments we're about to free up
1890 * due to the jetsam activity.
1891 */
1892 should_swap = FALSE;
1893 }
1894
1895#endif /* CONFIG_JETSAM */
1896
1897 if (should_swap == FALSE) {
1898 /*
1899 * vm_compressor_needs_to_major_compact returns true only if we're
1900 * about to run out of available compressor segments... in this
1901 * case, we absolutely need to run a major compaction even if
1902 * we've just kicked off a jetsam or we don't otherwise need to
1903 * swap... terminating objects releases
1904 * pages back to the uncompressed cache, but does not guarantee
1905 * that we will free up even a single compression segment
1906 */
1907 should_swap = vm_compressor_needs_to_major_compact();
1908 }
1909
1910 /*
1911 * returning TRUE when swap_supported == FALSE
1912 * will cause the major compaction engine to
1913 * run, but will not trigger any swapping...
1914 * segments that have been major compacted
1915 * will be moved to the majorcompact queue
1916 */
1917 return (should_swap);
1918}
1919
1920#if CONFIG_JETSAM
1921/*
1922 * This function is called from the jetsam thread after killing something to
1923 * mitigate thrashing.
1924 *
1925 * We need to restart our thrashing detection heuristics since memory pressure
1926 * has potentially changed significantly, and we don't want to detect on old
1927 * data from before the jetsam.
1928 */
1929void
1930vm_thrashing_jetsam_done(void)
1931{
1932 vm_compressor_thrashing_detected = FALSE;
1933
1934 /* Were we compressor-thrashing or filecache-thrashing? */
1935 if (swapout_target_age) {
1936 swapout_target_age = 0;
1937 compressor_need_sample_reset = TRUE;
1938 }
1939#if CONFIG_PHANTOM_CACHE
1940 else {
1941 vm_phantom_cache_restart_sample();
1942 }
1943#endif
1944}
1945#endif /* CONFIG_JETSAM */
1946
1947uint32_t vm_wake_compactor_swapper_calls = 0;
1948uint32_t vm_run_compactor_already_running = 0;
1949uint32_t vm_run_compactor_empty_minor_q = 0;
1950uint32_t vm_run_compactor_did_compact = 0;
1951uint32_t vm_run_compactor_waited = 0;
1952
1953void
1954vm_run_compactor(void)
1955{
1956 if (c_segment_count == 0)
1957 return;
1958
1959 lck_mtx_lock_spin_always(c_list_lock);
1960
1961 if (c_minor_count == 0) {
1962 vm_run_compactor_empty_minor_q++;
1963
1964 lck_mtx_unlock_always(c_list_lock);
1965 return;
1966 }
1967 if (compaction_swapper_running) {
1968
1969 if (vm_restricted_to_single_processor == FALSE) {
1970 vm_run_compactor_already_running++;
1971
1972 lck_mtx_unlock_always(c_list_lock);
1973 return;
1974 }
1975 vm_run_compactor_waited++;
1976
1977 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
1978
1979 lck_mtx_unlock_always(c_list_lock);
1980
1981 thread_block(THREAD_CONTINUE_NULL);
1982
1983 return;
1984 }
1985 vm_run_compactor_did_compact++;
1986
1987 fastwake_warmup = FALSE;
1988 compaction_swapper_running = 1;
1989
1990 vm_compressor_do_delayed_compactions(FALSE);
1991
1992 compaction_swapper_running = 0;
1993
1994 lck_mtx_unlock_always(c_list_lock);
1995
1996 thread_wakeup((event_t)&compaction_swapper_running);
1997}
1998
1999
2000void
2001vm_wake_compactor_swapper(void)
2002{
2003 if (compaction_swapper_running || compaction_swapper_awakened || c_segment_count == 0)
2004 return;
2005
2006 if (c_minor_count || vm_compressor_needs_to_major_compact()) {
2007
2008 lck_mtx_lock_spin_always(c_list_lock);
2009
2010 fastwake_warmup = FALSE;
2011
2012 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2013
2014 vm_wake_compactor_swapper_calls++;
2015
2016 compaction_swapper_awakened = 1;
2017 thread_wakeup((event_t)&c_compressor_swap_trigger);
2018 }
2019 lck_mtx_unlock_always(c_list_lock);
2020 }
2021}
2022
2023
2024void
2025vm_consider_swapping()
2026{
2027 c_segment_t c_seg, c_seg_next;
2028 clock_sec_t now;
2029 clock_nsec_t nsec;
2030
2031 assert(VM_CONFIG_SWAP_IS_PRESENT);
2032
2033 lck_mtx_lock_spin_always(c_list_lock);
2034
2035 compaction_swapper_abort = 1;
2036
2037 while (compaction_swapper_running) {
2038 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2039
2040 lck_mtx_unlock_always(c_list_lock);
2041
2042 thread_block(THREAD_CONTINUE_NULL);
2043
2044 lck_mtx_lock_spin_always(c_list_lock);
2045 }
2046 compaction_swapper_abort = 0;
2047 compaction_swapper_running = 1;
2048
2049 vm_swapout_ripe_segments = TRUE;
2050
2051 if (!queue_empty(&c_major_list_head)) {
2052
2053 clock_get_system_nanotime(&now, &nsec);
2054
2055 c_seg = (c_segment_t)queue_first(&c_major_list_head);
2056
2057 while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) {
2058
2059 if (c_overage_swapped_count >= c_overage_swapped_limit)
2060 break;
2061
2062 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2063
2064 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
2065
2066 lck_mtx_lock_spin_always(&c_seg->c_lock);
2067
2068 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2069
2070 lck_mtx_unlock_always(&c_seg->c_lock);
2071 }
2072 c_seg = c_seg_next;
2073 }
2074 }
2075 vm_compressor_compact_and_swap(FALSE);
2076
2077 compaction_swapper_running = 0;
2078
2079 vm_swapout_ripe_segments = FALSE;
2080
2081 lck_mtx_unlock_always(c_list_lock);
2082
2083 thread_wakeup((event_t)&compaction_swapper_running);
2084}
2085
2086
2087void
2088vm_consider_waking_compactor_swapper(void)
2089{
2090 boolean_t need_wakeup = FALSE;
2091
2092 if (c_segment_count == 0)
2093 return;
2094
2095 if (compaction_swapper_running || compaction_swapper_awakened)
2096 return;
2097
2098 if (!compaction_swapper_inited && !compaction_swapper_init_now) {
2099 compaction_swapper_init_now = 1;
2100 need_wakeup = TRUE;
2101 }
2102
2103 if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) {
2104
2105 need_wakeup = TRUE;
2106
2107 } else if (compressor_needs_to_swap()) {
2108
2109 need_wakeup = TRUE;
2110
2111 } else if (c_minor_count) {
2112 uint64_t total_bytes;
2113
2114 total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64;
2115
2116 if ((total_bytes - compressor_bytes_used) > total_bytes / 10)
2117 need_wakeup = TRUE;
2118 }
2119 if (need_wakeup == TRUE) {
2120
2121 lck_mtx_lock_spin_always(c_list_lock);
2122
2123 fastwake_warmup = FALSE;
2124
2125 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2126 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE);
2127
2128 compaction_swapper_awakened = 1;
2129 thread_wakeup((event_t)&c_compressor_swap_trigger);
2130 }
2131 lck_mtx_unlock_always(c_list_lock);
2132 }
2133}
2134
2135
2136#define C_SWAPOUT_LIMIT 4
2137#define DELAYED_COMPACTIONS_PER_PASS 30
2138
2139void
2140vm_compressor_do_delayed_compactions(boolean_t flush_all)
2141{
2142 c_segment_t c_seg;
2143 int number_compacted = 0;
2144 boolean_t needs_to_swap = FALSE;
2145
2146
2147#if !CONFIG_EMBEDDED
2148 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
2149#endif /* !CONFIG_EMBEDDED */
2150
2151 while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) {
2152
2153 c_seg = (c_segment_t)queue_first(&c_minor_list_head);
2154
2155 lck_mtx_lock_spin_always(&c_seg->c_lock);
2156
2157 if (c_seg->c_busy) {
2158
2159 lck_mtx_unlock_always(c_list_lock);
2160 c_seg_wait_on_busy(c_seg);
2161 lck_mtx_lock_spin_always(c_list_lock);
2162
2163 continue;
2164 }
2165 C_SEG_BUSY(c_seg);
2166
2167 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE);
2168
2169 if (VM_CONFIG_SWAP_IS_ACTIVE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) {
2170
2171 if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT)
2172 needs_to_swap = TRUE;
2173
2174 number_compacted = 0;
2175 }
2176 lck_mtx_lock_spin_always(c_list_lock);
2177 }
2178}
2179
2180
2181#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10
2182
2183static void
2184vm_compressor_age_swapped_in_segments(boolean_t flush_all)
2185{
2186 c_segment_t c_seg;
2187 clock_sec_t now;
2188 clock_nsec_t nsec;
2189
2190 clock_get_system_nanotime(&now, &nsec);
2191
2192 while (!queue_empty(&c_swappedin_list_head)) {
2193
2194 c_seg = (c_segment_t)queue_first(&c_swappedin_list_head);
2195
2196 if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT)
2197 break;
2198
2199 lck_mtx_lock_spin_always(&c_seg->c_lock);
2200
2201 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2202
2203 lck_mtx_unlock_always(&c_seg->c_lock);
2204 }
2205}
2206
2207
2208extern int vm_num_swap_files;
2209extern int vm_num_pinned_swap_files;
2210extern int vm_swappin_enabled;
2211
2212extern unsigned int vm_swapfile_total_segs_used;
2213extern unsigned int vm_swapfile_total_segs_alloced;
2214
2215
2216void
2217vm_compressor_flush(void)
2218{
2219 uint64_t vm_swap_put_failures_at_start;
2220 wait_result_t wait_result = 0;
2221 AbsoluteTime startTime, endTime;
2222 clock_sec_t now_sec;
2223 clock_nsec_t now_nsec;
2224 uint64_t nsec;
2225
2226 HIBLOG("vm_compressor_flush - starting\n");
2227
2228 clock_get_uptime(&startTime);
2229
2230 lck_mtx_lock_spin_always(c_list_lock);
2231
2232 fastwake_warmup = FALSE;
2233 compaction_swapper_abort = 1;
2234
2235 while (compaction_swapper_running) {
2236 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2237
2238 lck_mtx_unlock_always(c_list_lock);
2239
2240 thread_block(THREAD_CONTINUE_NULL);
2241
2242 lck_mtx_lock_spin_always(c_list_lock);
2243 }
2244 compaction_swapper_abort = 0;
2245 compaction_swapper_running = 1;
2246
2247 hibernate_flushing = TRUE;
2248 hibernate_no_swapspace = FALSE;
2249 c_generation_id_flush_barrier = c_generation_id + 1000;
2250
2251 clock_get_system_nanotime(&now_sec, &now_nsec);
2252 hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE;
2253
2254 vm_swap_put_failures_at_start = vm_swap_put_failures;
2255
2256 vm_compressor_compact_and_swap(TRUE);
2257
2258 while (!queue_empty(&c_swapout_list_head)) {
2259
2260 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
2261
2262 lck_mtx_unlock_always(c_list_lock);
2263
2264 wait_result = thread_block(THREAD_CONTINUE_NULL);
2265
2266 lck_mtx_lock_spin_always(c_list_lock);
2267
2268 if (wait_result == THREAD_TIMED_OUT)
2269 break;
2270 }
2271 hibernate_flushing = FALSE;
2272 compaction_swapper_running = 0;
2273
2274 if (vm_swap_put_failures > vm_swap_put_failures_at_start)
2275 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n",
2276 vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT);
2277
2278 lck_mtx_unlock_always(c_list_lock);
2279
2280 thread_wakeup((event_t)&compaction_swapper_running);
2281
2282 clock_get_uptime(&endTime);
2283 SUB_ABSOLUTETIME(&endTime, &startTime);
2284 absolutetime_to_nanoseconds(endTime, &nsec);
2285
2286 HIBLOG("vm_compressor_flush completed - took %qd msecs - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d, vm_swappin_enabled = %d\n",
2287 nsec / 1000000ULL, vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled);
2288}
2289
2290
2291int compaction_swap_trigger_thread_awakened = 0;
2292
2293static void
2294vm_compressor_swap_trigger_thread(void)
2295{
2296 current_thread()->options |= TH_OPT_VMPRIV;
2297
2298 /*
2299 * compaction_swapper_init_now is set when the first call to
2300 * vm_consider_waking_compactor_swapper is made from
2301 * vm_pageout_scan... since this function is called upon
2302 * thread creation, we want to make sure to delay adjusting
2303 * the tuneables until we are awakened via vm_pageout_scan
2304 * so that we are at a point where the vm_swapfile_open will
2305 * be operating on the correct directory (in case the default
2306 * of /var/vm/ is overridden by the dymanic_pager
2307 */
2308 if (compaction_swapper_init_now) {
2309 vm_compaction_swapper_do_init();
2310
2311 if (vm_restricted_to_single_processor == TRUE)
2312 thread_vm_bind_group_add();
2313 thread_set_thread_name(current_thread(), "VM_cswap_trigger");
2314 compaction_swapper_init_now = 0;
2315 }
2316 lck_mtx_lock_spin_always(c_list_lock);
2317
2318 compaction_swap_trigger_thread_awakened++;
2319 compaction_swapper_awakened = 0;
2320
2321 if (compaction_swapper_running == 0) {
2322
2323 compaction_swapper_running = 1;
2324
2325 vm_compressor_compact_and_swap(FALSE);
2326
2327 compaction_swapper_running = 0;
2328 }
2329 assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT);
2330
2331 if (compaction_swapper_running == 0)
2332 thread_wakeup((event_t)&compaction_swapper_running);
2333
2334 lck_mtx_unlock_always(c_list_lock);
2335
2336 thread_block((thread_continue_t)vm_compressor_swap_trigger_thread);
2337
2338 /* NOTREACHED */
2339}
2340
2341
2342void
2343vm_compressor_record_warmup_start(void)
2344{
2345 c_segment_t c_seg;
2346
2347 lck_mtx_lock_spin_always(c_list_lock);
2348
2349 if (first_c_segment_to_warm_generation_id == 0) {
2350 if (!queue_empty(&c_age_list_head)) {
2351
2352 c_seg = (c_segment_t)queue_last(&c_age_list_head);
2353
2354 first_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2355 } else
2356 first_c_segment_to_warm_generation_id = 0;
2357
2358 fastwake_recording_in_progress = TRUE;
2359 }
2360 lck_mtx_unlock_always(c_list_lock);
2361}
2362
2363
2364void
2365vm_compressor_record_warmup_end(void)
2366{
2367 c_segment_t c_seg;
2368
2369 lck_mtx_lock_spin_always(c_list_lock);
2370
2371 if (fastwake_recording_in_progress == TRUE) {
2372
2373 if (!queue_empty(&c_age_list_head)) {
2374
2375 c_seg = (c_segment_t)queue_last(&c_age_list_head);
2376
2377 last_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2378 } else
2379 last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id;
2380
2381 fastwake_recording_in_progress = FALSE;
2382
2383 HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2384 }
2385 lck_mtx_unlock_always(c_list_lock);
2386}
2387
2388
2389#define DELAY_TRIM_ON_WAKE_SECS 25
2390
2391void
2392vm_compressor_delay_trim(void)
2393{
2394 clock_sec_t sec;
2395 clock_nsec_t nsec;
2396
2397 clock_get_system_nanotime(&sec, &nsec);
2398 dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS;
2399}
2400
2401
2402void
2403vm_compressor_do_warmup(void)
2404{
2405 lck_mtx_lock_spin_always(c_list_lock);
2406
2407 if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) {
2408 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2409
2410 lck_mtx_unlock_always(c_list_lock);
2411 return;
2412 }
2413
2414 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
2415
2416 fastwake_warmup = TRUE;
2417
2418 compaction_swapper_awakened = 1;
2419 thread_wakeup((event_t)&c_compressor_swap_trigger);
2420 }
2421 lck_mtx_unlock_always(c_list_lock);
2422}
2423
2424void
2425do_fastwake_warmup_all(void)
2426{
2427
2428 lck_mtx_lock_spin_always(c_list_lock);
2429
2430 if (queue_empty(&c_swappedout_list_head) && queue_empty(&c_swappedout_sparse_list_head)) {
2431
2432 lck_mtx_unlock_always(c_list_lock);
2433 return;
2434 }
2435
2436 fastwake_warmup = TRUE;
2437
2438 do_fastwake_warmup(&c_swappedout_list_head, TRUE);
2439
2440 do_fastwake_warmup(&c_swappedout_sparse_list_head, TRUE);
2441
2442 fastwake_warmup = FALSE;
2443
2444 lck_mtx_unlock_always(c_list_lock);
2445
2446}
2447
2448void
2449do_fastwake_warmup(queue_head_t *c_queue, boolean_t consider_all_cseg)
2450{
2451 c_segment_t c_seg = NULL;
2452 AbsoluteTime startTime, endTime;
2453 uint64_t nsec;
2454
2455
2456 HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2457
2458 clock_get_uptime(&startTime);
2459
2460 lck_mtx_unlock_always(c_list_lock);
2461
2462 proc_set_thread_policy(current_thread(),
2463 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
2464
2465 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2466
2467 lck_mtx_lock_spin_always(c_list_lock);
2468
2469 while (!queue_empty(c_queue) && fastwake_warmup == TRUE) {
2470
2471 c_seg = (c_segment_t) queue_first(c_queue);
2472
2473 if (consider_all_cseg == FALSE) {
2474 if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id ||
2475 c_seg->c_generation_id > last_c_segment_to_warm_generation_id)
2476 break;
2477
2478 if (vm_page_free_count < (AVAILABLE_MEMORY / 4))
2479 break;
2480 }
2481
2482 lck_mtx_lock_spin_always(&c_seg->c_lock);
2483 lck_mtx_unlock_always(c_list_lock);
2484
2485 if (c_seg->c_busy) {
2486 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2487 c_seg_wait_on_busy(c_seg);
2488 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2489 } else {
2490 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0)
2491 lck_mtx_unlock_always(&c_seg->c_lock);
2492 c_segment_warmup_count++;
2493
2494 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2495 vm_pageout_io_throttle();
2496 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2497 }
2498 lck_mtx_lock_spin_always(c_list_lock);
2499 }
2500 lck_mtx_unlock_always(c_list_lock);
2501
2502 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2503
2504 proc_set_thread_policy(current_thread(),
2505 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0);
2506
2507 clock_get_uptime(&endTime);
2508 SUB_ABSOLUTETIME(&endTime, &startTime);
2509 absolutetime_to_nanoseconds(endTime, &nsec);
2510
2511 HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL);
2512
2513 lck_mtx_lock_spin_always(c_list_lock);
2514
2515 if (consider_all_cseg == FALSE) {
2516 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2517 }
2518}
2519
2520
2521void
2522vm_compressor_compact_and_swap(boolean_t flush_all)
2523{
2524 c_segment_t c_seg, c_seg_next;
2525 boolean_t keep_compacting;
2526 clock_sec_t now;
2527 clock_nsec_t nsec;
2528
2529
2530 if (fastwake_warmup == TRUE) {
2531 uint64_t starting_warmup_count;
2532
2533 starting_warmup_count = c_segment_warmup_count;
2534
2535 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count,
2536 first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0);
2537 do_fastwake_warmup(&c_swappedout_list_head, FALSE);
2538 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0);
2539
2540 fastwake_warmup = FALSE;
2541 }
2542
2543 /*
2544 * it's possible for the c_age_list_head to be empty if we
2545 * hit our limits for growing the compressor pool and we subsequently
2546 * hibernated... on the next hibernation we could see the queue as
2547 * empty and not proceeed even though we have a bunch of segments on
2548 * the swapped in queue that need to be dealt with.
2549 */
2550 vm_compressor_do_delayed_compactions(flush_all);
2551
2552 vm_compressor_age_swapped_in_segments(flush_all);
2553
2554 /*
2555 * we only need to grab the timestamp once per
2556 * invocation of this function since the
2557 * timescale we're interested in is measured
2558 * in days
2559 */
2560 clock_get_system_nanotime(&now, &nsec);
2561
2562 while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) {
2563
2564 if (hibernate_flushing == TRUE) {
2565 clock_sec_t sec;
2566
2567 if (hibernate_should_abort()) {
2568 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
2569 break;
2570 }
2571 if (hibernate_no_swapspace == TRUE) {
2572 HIBLOG("vm_compressor_flush - out of swap space\n");
2573 break;
2574 }
2575 if (vm_swap_files_pinned() == FALSE) {
2576 HIBLOG("vm_compressor_flush - unpinned swap files\n");
2577 break;
2578 }
2579 if (hibernate_in_progress_with_pinned_swap == TRUE &&
2580 (vm_swapfile_total_segs_alloced == vm_swapfile_total_segs_used)) {
2581 HIBLOG("vm_compressor_flush - out of pinned swap space\n");
2582 break;
2583 }
2584 clock_get_system_nanotime(&sec, &nsec);
2585
2586 if (sec > hibernate_flushing_deadline) {
2587 HIBLOG("vm_compressor_flush - failed to finish before deadline\n");
2588 break;
2589 }
2590 }
2591 if (c_swapout_count >= C_SWAPOUT_LIMIT) {
2592
2593 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000*NSEC_PER_USEC);
2594
2595 lck_mtx_unlock_always(c_list_lock);
2596
2597 thread_block(THREAD_CONTINUE_NULL);
2598
2599 lck_mtx_lock_spin_always(c_list_lock);
2600 }
2601 /*
2602 * Minor compactions
2603 */
2604 vm_compressor_do_delayed_compactions(flush_all);
2605
2606 vm_compressor_age_swapped_in_segments(flush_all);
2607
2608 if (c_swapout_count >= C_SWAPOUT_LIMIT) {
2609 /*
2610 * we timed out on the above thread_block
2611 * let's loop around and try again
2612 * the timeout allows us to continue
2613 * to do minor compactions to make
2614 * more memory available
2615 */
2616 continue;
2617 }
2618
2619 /*
2620 * Swap out segments?
2621 */
2622 if (flush_all == FALSE) {
2623 boolean_t needs_to_swap;
2624
2625 lck_mtx_unlock_always(c_list_lock);
2626
2627 needs_to_swap = compressor_needs_to_swap();
2628
2629#if !CONFIG_EMBEDDED
2630 if (needs_to_swap == TRUE && vm_swap_low_on_space())
2631 vm_compressor_take_paging_space_action();
2632#endif /* !CONFIG_EMBEDDED */
2633
2634 lck_mtx_lock_spin_always(c_list_lock);
2635
2636 if (needs_to_swap == FALSE)
2637 break;
2638 }
2639 if (queue_empty(&c_age_list_head))
2640 break;
2641 c_seg = (c_segment_t) queue_first(&c_age_list_head);
2642
2643 assert(c_seg->c_state == C_ON_AGE_Q);
2644
2645 if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier)
2646 break;
2647
2648 lck_mtx_lock_spin_always(&c_seg->c_lock);
2649
2650 if (c_seg->c_busy) {
2651
2652 lck_mtx_unlock_always(c_list_lock);
2653 c_seg_wait_on_busy(c_seg);
2654 lck_mtx_lock_spin_always(c_list_lock);
2655
2656 continue;
2657 }
2658 C_SEG_BUSY(c_seg);
2659
2660 if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) {
2661 /*
2662 * found an empty c_segment and freed it
2663 * so go grab the next guy in the queue
2664 */
2665 c_seg_major_compact_stats.count_of_freed_segs++;
2666 continue;
2667 }
2668 /*
2669 * Major compaction
2670 */
2671 keep_compacting = TRUE;
2672
2673 while (keep_compacting == TRUE) {
2674
2675 assert(c_seg->c_busy);
2676
2677 /* look for another segment to consolidate */
2678
2679 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2680
2681 if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next))
2682 break;
2683
2684 assert(c_seg_next->c_state == C_ON_AGE_Q);
2685
2686 if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE)
2687 break;
2688
2689 lck_mtx_lock_spin_always(&c_seg_next->c_lock);
2690
2691 if (c_seg_next->c_busy) {
2692
2693 lck_mtx_unlock_always(c_list_lock);
2694 c_seg_wait_on_busy(c_seg_next);
2695 lck_mtx_lock_spin_always(c_list_lock);
2696
2697 continue;
2698 }
2699 /* grab that segment */
2700 C_SEG_BUSY(c_seg_next);
2701
2702 if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) {
2703 /*
2704 * found an empty c_segment and freed it
2705 * so we can't continue to use c_seg_next
2706 */
2707 c_seg_major_compact_stats.count_of_freed_segs++;
2708 continue;
2709 }
2710
2711 /* unlock the list ... */
2712 lck_mtx_unlock_always(c_list_lock);
2713
2714 /* do the major compaction */
2715
2716 keep_compacting = c_seg_major_compact(c_seg, c_seg_next);
2717
2718 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2719
2720 lck_mtx_lock_spin_always(&c_seg_next->c_lock);
2721 /*
2722 * run a minor compaction on the donor segment
2723 * since we pulled at least some of it's
2724 * data into our target... if we've emptied
2725 * it, now is a good time to free it which
2726 * c_seg_minor_compaction_and_unlock also takes care of
2727 *
2728 * by passing TRUE, we ask for c_busy to be cleared
2729 * and c_wanted to be taken care of
2730 */
2731 if (c_seg_minor_compaction_and_unlock(c_seg_next, TRUE))
2732 c_seg_major_compact_stats.count_of_freed_segs++;
2733
2734 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2735
2736 /* relock the list */
2737 lck_mtx_lock_spin_always(c_list_lock);
2738
2739 } /* major compaction */
2740
2741 lck_mtx_lock_spin_always(&c_seg->c_lock);
2742
2743 assert(c_seg->c_busy);
2744 assert(!c_seg->c_on_minorcompact_q);
2745
2746 if (VM_CONFIG_SWAP_IS_ACTIVE) {
2747 /*
2748 * This mode of putting a generic c_seg on the swapout list is
2749 * only supported when we have general swapping enabled
2750 */
2751 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
2752 } else {
2753 if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) {
2754
2755 assert(VM_CONFIG_SWAP_IS_PRESENT);
2756 /*
2757 * we are running compressor sweeps with swap-behind
2758 * make sure the c_seg has aged enough before swapping it
2759 * out...
2760 */
2761 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
2762 c_seg->c_overage_swap = TRUE;
2763 c_overage_swapped_count++;
2764 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
2765 }
2766 }
2767 }
2768 if (c_seg->c_state == C_ON_AGE_Q) {
2769 /*
2770 * this c_seg didn't get moved to the swapout queue
2771 * so we need to move it out of the way...
2772 * we just did a major compaction on it so put it
2773 * on that queue
2774 */
2775 c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE);
2776 } else {
2777 c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used;
2778 c_seg_major_compact_stats.count_of_swapouts++;
2779 }
2780 C_SEG_WAKEUP_DONE(c_seg);
2781
2782 lck_mtx_unlock_always(&c_seg->c_lock);
2783
2784 if (c_swapout_count) {
2785 lck_mtx_unlock_always(c_list_lock);
2786
2787 thread_wakeup((event_t)&c_swapout_list_head);
2788
2789 lck_mtx_lock_spin_always(c_list_lock);
2790 }
2791 }
2792}
2793
2794
2795static c_segment_t
2796c_seg_allocate(c_segment_t *current_chead)
2797{
2798 c_segment_t c_seg;
2799 int min_needed;
2800 int size_to_populate;
2801
2802#if !CONFIG_EMBEDDED
2803 if (vm_compressor_low_on_space())
2804 vm_compressor_take_paging_space_action();
2805#endif /* !CONFIG_EMBEDDED */
2806
2807 if ( (c_seg = *current_chead) == NULL ) {
2808 uint32_t c_segno;
2809
2810 lck_mtx_lock_spin_always(c_list_lock);
2811
2812 while (c_segments_busy == TRUE) {
2813 assert_wait((event_t) (&c_segments_busy), THREAD_UNINT);
2814
2815 lck_mtx_unlock_always(c_list_lock);
2816
2817 thread_block(THREAD_CONTINUE_NULL);
2818
2819 lck_mtx_lock_spin_always(c_list_lock);
2820 }
2821 if (c_free_segno_head == (uint32_t)-1) {
2822 uint32_t c_segments_available_new;
2823
2824 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) {
2825 lck_mtx_unlock_always(c_list_lock);
2826
2827 return (NULL);
2828 }
2829 c_segments_busy = TRUE;
2830 lck_mtx_unlock_always(c_list_lock);
2831
2832 kernel_memory_populate(compressor_map, (vm_offset_t)c_segments_next_page,
2833 PAGE_SIZE, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
2834 c_segments_next_page += PAGE_SIZE;
2835
2836 c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE;
2837
2838 if (c_segments_available_new > c_segments_limit)
2839 c_segments_available_new = c_segments_limit;
2840
2841 for (c_segno = c_segments_available + 1; c_segno < c_segments_available_new; c_segno++)
2842 c_segments[c_segno - 1].c_segno = c_segno;
2843
2844 lck_mtx_lock_spin_always(c_list_lock);
2845
2846 c_segments[c_segno - 1].c_segno = c_free_segno_head;
2847 c_free_segno_head = c_segments_available;
2848 c_segments_available = c_segments_available_new;
2849
2850 c_segments_busy = FALSE;
2851 thread_wakeup((event_t) (&c_segments_busy));
2852 }
2853 c_segno = c_free_segno_head;
2854 assert(c_segno >= 0 && c_segno < c_segments_limit);
2855
2856 c_free_segno_head = (uint32_t)c_segments[c_segno].c_segno;
2857
2858 /*
2859 * do the rest of the bookkeeping now while we're still behind
2860 * the list lock and grab our generation id now into a local
2861 * so that we can install it once we have the c_seg allocated
2862 */
2863 c_segment_count++;
2864 if (c_segment_count > c_segment_count_max)
2865 c_segment_count_max = c_segment_count;
2866
2867 lck_mtx_unlock_always(c_list_lock);
2868
2869 c_seg = (c_segment_t)zalloc(compressor_segment_zone);
2870 bzero((char *)c_seg, sizeof(struct c_segment));
2871
2872 c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno);
2873
2874 lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
2875
2876 c_seg->c_state = C_IS_EMPTY;
2877 c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX;
2878 c_seg->c_mysegno = c_segno;
2879
2880 lck_mtx_lock_spin_always(c_list_lock);
2881 c_empty_count++;
2882 c_seg_switch_state(c_seg, C_IS_FILLING, FALSE);
2883 c_segments[c_segno].c_seg = c_seg;
2884 assert(c_segments[c_segno].c_segno > c_segments_available);
2885 lck_mtx_unlock_always(c_list_lock);
2886
2887 *current_chead = c_seg;
2888
2889#if DEVELOPMENT || DEBUG
2890 C_SEG_MAKE_WRITEABLE(c_seg);
2891#endif
2892
2893 }
2894 c_seg_alloc_nextslot(c_seg);
2895
2896 size_to_populate = C_SEG_ALLOCSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset);
2897
2898 if (size_to_populate) {
2899
2900 min_needed = PAGE_SIZE + (C_SEG_ALLOCSIZE - C_SEG_BUFSIZE);
2901
2902 if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) {
2903
2904 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
2905 size_to_populate = C_SEG_MAX_POPULATE_SIZE;
2906 vm_compressor_pages_grabbed += size_to_populate / PAGE_SIZE;
2907
2908 kernel_memory_populate(compressor_map,
2909 (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
2910 size_to_populate,
2911 KMA_COMPRESSOR,
2912 VM_KERN_MEMORY_COMPRESSOR);
2913 } else
2914 size_to_populate = 0;
2915 }
2916 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2917
2918 lck_mtx_lock_spin_always(&c_seg->c_lock);
2919
2920 if (size_to_populate)
2921 c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
2922
2923 return (c_seg);
2924}
2925
2926
2927static void
2928c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
2929{
2930 uint32_t unused_bytes;
2931 uint32_t offset_to_depopulate;
2932 int new_state = C_ON_AGE_Q;
2933 clock_sec_t sec;
2934 clock_nsec_t nsec;
2935
2936 unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset));
2937
2938#ifndef _OPEN_SOURCE
2939 /* TODO: The HW codec can generate, lazily, a '2nd page not mapped'
2940 * exception. So on such a platform, or platforms where we're confident
2941 * the codec does not require a buffer page to absorb trailing writes,
2942 * we can create an unmapped hole at the tail of the segment, rather
2943 * than a populated mapping. This will also guarantee that the codec
2944 * does not overwrite valid data past the edge of the segment and
2945 * thus eliminate the depopulation overhead.
2946 */
2947#endif
2948 if (unused_bytes) {
2949 offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset)));
2950
2951 /*
2952 * release the extra physical page(s) at the end of the segment
2953 */
2954 lck_mtx_unlock_always(&c_seg->c_lock);
2955
2956 kernel_memory_depopulate(
2957 compressor_map,
2958 (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate],
2959 unused_bytes,
2960 KMA_COMPRESSOR);
2961
2962 lck_mtx_lock_spin_always(&c_seg->c_lock);
2963
2964 c_seg->c_populated_offset = offset_to_depopulate;
2965 }
2966 assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= C_SEG_BUFSIZE);
2967
2968#if DEVELOPMENT || DEBUG
2969 {
2970 boolean_t c_seg_was_busy = FALSE;
2971
2972 if ( !c_seg->c_busy)
2973 C_SEG_BUSY(c_seg);
2974 else
2975 c_seg_was_busy = TRUE;
2976
2977 lck_mtx_unlock_always(&c_seg->c_lock);
2978
2979 C_SEG_WRITE_PROTECT(c_seg);
2980
2981 lck_mtx_lock_spin_always(&c_seg->c_lock);
2982
2983 if (c_seg_was_busy == FALSE)
2984 C_SEG_WAKEUP_DONE(c_seg);
2985 }
2986#endif
2987
2988#if CONFIG_FREEZE
2989 if (current_chead == (c_segment_t*)&freezer_chead &&
2990 VM_CONFIG_SWAP_IS_PRESENT &&
2991 VM_CONFIG_FREEZER_SWAP_IS_ACTIVE &&
2992 c_freezer_swapout_count < VM_MAX_FREEZER_CSEG_SWAP_COUNT) {
2993 new_state = C_ON_SWAPOUT_Q;
2994 }
2995#endif /* CONFIG_FREEZE */
2996
2997 clock_get_system_nanotime(&sec, &nsec);
2998 c_seg->c_creation_ts = (uint32_t)sec;
2999
3000 lck_mtx_lock_spin_always(c_list_lock);
3001
3002#if CONFIG_FREEZE
3003 if (c_seg->c_state == C_ON_SWAPOUT_Q)
3004 c_freezer_swapout_count++;
3005#endif /* CONFIG_FREEZE */
3006
3007 c_seg->c_generation_id = c_generation_id++;
3008 c_seg_switch_state(c_seg, new_state, FALSE);
3009
3010 if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
3011 c_seg_need_delayed_compaction(c_seg, TRUE);
3012
3013 lck_mtx_unlock_always(c_list_lock);
3014
3015#if CONFIG_FREEZE
3016 if (c_seg->c_state == C_ON_SWAPOUT_Q)
3017 thread_wakeup((event_t)&c_swapout_list_head);
3018#endif /* CONFIG_FREEZE */
3019
3020 *current_chead = NULL;
3021}
3022
3023
3024/*
3025 * returns with c_seg locked
3026 */
3027void
3028c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data, boolean_t minor_compact_ok, boolean_t age_on_swapin_q)
3029{
3030 clock_sec_t sec;
3031 clock_nsec_t nsec;
3032
3033 clock_get_system_nanotime(&sec, &nsec);
3034
3035 lck_mtx_lock_spin_always(c_list_lock);
3036 lck_mtx_lock_spin_always(&c_seg->c_lock);
3037
3038 assert(c_seg->c_busy_swapping);
3039 assert(c_seg->c_busy);
3040
3041 c_seg->c_busy_swapping = 0;
3042
3043 if (c_seg->c_overage_swap == TRUE) {
3044 c_overage_swapped_count--;
3045 c_seg->c_overage_swap = FALSE;
3046 }
3047 if (has_data == TRUE) {
3048 if (age_on_swapin_q == TRUE)
3049 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
3050 else
3051 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
3052
3053 if (minor_compact_ok == TRUE && !c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
3054 c_seg_need_delayed_compaction(c_seg, TRUE);
3055 } else {
3056 c_seg->c_store.c_buffer = (int32_t*) NULL;
3057 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
3058
3059 c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE);
3060 }
3061 c_seg->c_swappedin_ts = (uint32_t)sec;
3062
3063 lck_mtx_unlock_always(c_list_lock);
3064}
3065
3066
3067
3068/*
3069 * c_seg has to be locked and is returned locked if the c_seg isn't freed
3070 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE
3071 * c_seg_swapin returns 1 if the c_seg was freed, 0 otherwise
3072 */
3073
3074int
3075c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction, boolean_t age_on_swapin_q)
3076{
3077 vm_offset_t addr = 0;
3078 uint32_t io_size = 0;
3079 uint64_t f_offset;
3080
3081 assert(C_SEG_IS_ONDISK(c_seg));
3082
3083#if !CHECKSUM_THE_SWAP
3084 c_seg_trim_tail(c_seg);
3085#endif
3086 io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
3087 f_offset = c_seg->c_store.c_swap_handle;
3088
3089 C_SEG_BUSY(c_seg);
3090 c_seg->c_busy_swapping = 1;
3091
3092 /*
3093 * This thread is likely going to block for I/O.
3094 * Make sure it is ready to run when the I/O completes because
3095 * it needs to clear the busy bit on the c_seg so that other
3096 * waiting threads can make progress too. To do that, boost
3097 * the rwlock_count so that the priority is boosted.
3098 */
3099 set_thread_rwlock_boost();
3100 lck_mtx_unlock_always(&c_seg->c_lock);
3101
3102 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3103
3104 addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
3105 c_seg->c_store.c_buffer = (int32_t*) addr;
3106
3107 kernel_memory_populate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
3108
3109 if (vm_swap_get(c_seg, f_offset, io_size) != KERN_SUCCESS) {
3110 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3111
3112 kernel_memory_depopulate(compressor_map, addr, io_size, KMA_COMPRESSOR);
3113
3114 c_seg_swapin_requeue(c_seg, FALSE, TRUE, age_on_swapin_q);
3115 } else {
3116#if ENCRYPTED_SWAP
3117 vm_swap_decrypt(c_seg);
3118#endif /* ENCRYPTED_SWAP */
3119
3120#if CHECKSUM_THE_SWAP
3121 if (c_seg->cseg_swap_size != io_size)
3122 panic("swapin size doesn't match swapout size");
3123
3124 if (c_seg->cseg_hash != vmc_hash((char*) c_seg->c_store.c_buffer, (int)io_size)) {
3125 panic("c_seg_swapin - Swap hash mismatch\n");
3126 }
3127#endif /* CHECKSUM_THE_SWAP */
3128
3129 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3130
3131 c_seg_swapin_requeue(c_seg, TRUE, force_minor_compaction == TRUE ? FALSE : TRUE, age_on_swapin_q);
3132
3133 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
3134
3135 if (force_minor_compaction == TRUE) {
3136 if (c_seg_minor_compaction_and_unlock(c_seg, FALSE)) {
3137 /*
3138 * c_seg was completely empty so it was freed,
3139 * so be careful not to reference it again
3140 *
3141 * Drop the rwlock_count so that the thread priority
3142 * is returned back to where it is supposed to be.
3143 */
3144 clear_thread_rwlock_boost();
3145 return (1);
3146 }
3147
3148 lck_mtx_lock_spin_always(&c_seg->c_lock);
3149 }
3150 }
3151 C_SEG_WAKEUP_DONE(c_seg);
3152
3153 /*
3154 * Drop the rwlock_count so that the thread priority
3155 * is returned back to where it is supposed to be.
3156 */
3157 clear_thread_rwlock_boost();
3158
3159 return (0);
3160}
3161
3162
3163static void
3164c_segment_sv_hash_drop_ref(int hash_indx)
3165{
3166 struct c_sv_hash_entry o_sv_he, n_sv_he;
3167
3168 while (1) {
3169
3170 o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record;
3171
3172 n_sv_he.he_ref = o_sv_he.he_ref - 1;
3173 n_sv_he.he_data = o_sv_he.he_data;
3174
3175 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) {
3176 if (n_sv_he.he_ref == 0)
3177 OSAddAtomic(-1, &c_segment_svp_in_hash);
3178 break;
3179 }
3180 }
3181}
3182
3183
3184static int
3185c_segment_sv_hash_insert(uint32_t data)
3186{
3187 int hash_sindx;
3188 int misses;
3189 struct c_sv_hash_entry o_sv_he, n_sv_he;
3190 boolean_t got_ref = FALSE;
3191
3192 if (data == 0)
3193 OSAddAtomic(1, &c_segment_svp_zero_compressions);
3194 else
3195 OSAddAtomic(1, &c_segment_svp_nonzero_compressions);
3196
3197 hash_sindx = data & C_SV_HASH_MASK;
3198
3199 for (misses = 0; misses < C_SV_HASH_MAX_MISS; misses++)
3200 {
3201 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3202
3203 while (o_sv_he.he_data == data || o_sv_he.he_ref == 0) {
3204 n_sv_he.he_ref = o_sv_he.he_ref + 1;
3205 n_sv_he.he_data = data;
3206
3207 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) {
3208 if (n_sv_he.he_ref == 1)
3209 OSAddAtomic(1, &c_segment_svp_in_hash);
3210 got_ref = TRUE;
3211 break;
3212 }
3213 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3214 }
3215 if (got_ref == TRUE)
3216 break;
3217 hash_sindx++;
3218
3219 if (hash_sindx == C_SV_HASH_SIZE)
3220 hash_sindx = 0;
3221 }
3222 if (got_ref == FALSE)
3223 return(-1);
3224
3225 return (hash_sindx);
3226}
3227
3228
3229#if RECORD_THE_COMPRESSED_DATA
3230
3231static void
3232c_compressed_record_data(char *src, int c_size)
3233{
3234 if ((c_compressed_record_cptr + c_size + 4) >= c_compressed_record_ebuf)
3235 panic("c_compressed_record_cptr >= c_compressed_record_ebuf");
3236
3237 *(int *)((void *)c_compressed_record_cptr) = c_size;
3238
3239 c_compressed_record_cptr += 4;
3240
3241 memcpy(c_compressed_record_cptr, src, c_size);
3242 c_compressed_record_cptr += c_size;
3243}
3244#endif
3245
3246
3247static int
3248c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf)
3249{
3250 int c_size;
3251 int c_rounded_size = 0;
3252 int max_csize;
3253 c_slot_t cs;
3254 c_segment_t c_seg;
3255
3256 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0);
3257retry:
3258 if ((c_seg = c_seg_allocate(current_chead)) == NULL) {
3259 return (1);
3260 }
3261 /*
3262 * returns with c_seg lock held
3263 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
3264 * c_nextslot has been allocated and
3265 * c_store.c_buffer populated
3266 */
3267 assert(c_seg->c_state == C_IS_FILLING);
3268
3269 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot);
3270
3271 cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr);
3272 assert(slot_ptr == (c_slot_mapping_t)C_SLOT_UNPACK_PTR(cs));
3273
3274 cs->c_offset = c_seg->c_nextoffset;
3275
3276 max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset);
3277
3278 if (max_csize > PAGE_SIZE)
3279 max_csize = PAGE_SIZE;
3280
3281#if CHECKSUM_THE_DATA
3282 cs->c_hash_data = vmc_hash(src, PAGE_SIZE);
3283#endif
3284 boolean_t incomp_copy = FALSE;
3285 int max_csize_adj = (max_csize - 4);
3286
3287 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3288#if defined(__arm__) || defined(__arm64__)
3289 uint16_t ccodec = CINVALID;
3290
3291 if (max_csize >= C_SEG_OFFSET_ALIGNMENT_BOUNDARY) {
3292 c_size = metacompressor((const uint8_t *) src,
3293 (uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
3294 max_csize_adj, &ccodec,
3295 scratch_buf, &incomp_copy);
3296#if C_SEG_OFFSET_ALIGNMENT_BOUNDARY > 4
3297 if (c_size > max_csize_adj) {
3298 c_size = -1;
3299 }
3300#endif
3301 } else {
3302 c_size = -1;
3303 }
3304 assert(ccodec == CCWK || ccodec == CCLZ4);
3305 cs->c_codec = ccodec;
3306#endif
3307 } else {
3308#if defined(__arm__) || defined(__arm64__)
3309 cs->c_codec = CCWK;
3310#endif
3311#if defined(__arm64__)
3312 __unreachable_ok_push
3313 if (PAGE_SIZE == 4096)
3314 c_size = WKdm_compress_4k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3315 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3316 else {
3317 c_size = WKdm_compress_16k((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3318 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3319 }
3320 __unreachable_ok_pop
3321#else
3322 c_size = WKdm_compress_new((const WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3323 (WK_word *)(uintptr_t)scratch_buf, max_csize_adj);
3324#endif
3325 }
3326 assertf(((c_size <= max_csize_adj) && (c_size >= -1)),
3327 "c_size invalid (%d, %d), cur compressions: %d", c_size, max_csize_adj, c_segment_pages_compressed);
3328
3329 if (c_size == -1) {
3330 if (max_csize < PAGE_SIZE) {
3331 c_current_seg_filled(c_seg, current_chead);
3332 assert(*current_chead == NULL);
3333
3334 lck_mtx_unlock_always(&c_seg->c_lock);
3335 /* TODO: it may be worth requiring codecs to distinguish
3336 * between incompressible inputs and failures due to
3337 * budget exhaustion.
3338 */
3339 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3340 goto retry;
3341 }
3342 c_size = PAGE_SIZE;
3343
3344 if (incomp_copy == FALSE) {
3345 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3346 }
3347
3348 OSAddAtomic(1, &c_segment_noncompressible_pages);
3349
3350 } else if (c_size == 0) {
3351 int hash_index;
3352
3353 /*
3354 * special case - this is a page completely full of a single 32 bit value
3355 */
3356 hash_index = c_segment_sv_hash_insert(*(uint32_t *)(uintptr_t)src);
3357
3358 if (hash_index != -1) {
3359 slot_ptr->s_cindx = hash_index;
3360 slot_ptr->s_cseg = C_SV_CSEG_ID;
3361
3362 OSAddAtomic(1, &c_segment_svp_hash_succeeded);
3363#if RECORD_THE_COMPRESSED_DATA
3364 c_compressed_record_data(src, 4);
3365#endif
3366 goto sv_compression;
3367 }
3368 c_size = 4;
3369
3370 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3371
3372 OSAddAtomic(1, &c_segment_svp_hash_failed);
3373 }
3374
3375#if RECORD_THE_COMPRESSED_DATA
3376 c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3377#endif
3378#if CHECKSUM_THE_COMPRESSED_DATA
3379 cs->c_hash_compressed_data = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3380#endif
3381#if POPCOUNT_THE_COMPRESSED_DATA
3382 cs->c_pop_cdata = vmc_pop((uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset], c_size);
3383#endif
3384 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3385
3386 PACK_C_SIZE(cs, c_size);
3387 c_seg->c_bytes_used += c_rounded_size;
3388 c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
3389 c_seg->c_slots_used++;
3390
3391 slot_ptr->s_cindx = c_seg->c_nextslot++;
3392 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
3393 slot_ptr->s_cseg = c_seg->c_mysegno + 1;
3394
3395sv_compression:
3396 if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX_INDEX) {
3397 c_current_seg_filled(c_seg, current_chead);
3398 assert(*current_chead == NULL);
3399 }
3400 lck_mtx_unlock_always(&c_seg->c_lock);
3401
3402 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3403
3404#if RECORD_THE_COMPRESSED_DATA
3405 if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= C_SEG_ALLOCSIZE) {
3406 c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf));
3407 c_compressed_record_cptr = c_compressed_record_sbuf;
3408 }
3409#endif
3410 if (c_size) {
3411 OSAddAtomic64(c_size, &c_segment_compressed_bytes);
3412 OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
3413 }
3414 OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes);
3415
3416 OSAddAtomic(1, &c_segment_pages_compressed);
3417 OSAddAtomic(1, &sample_period_compression_count);
3418
3419 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0);
3420
3421 return (0);
3422}
3423
3424static inline void sv_decompress(int32_t *ddst, int32_t pattern) {
3425#if __x86_64__
3426 memset_word(ddst, pattern, PAGE_SIZE / sizeof(int32_t));
3427#else
3428 size_t i;
3429
3430 /* Unroll the pattern fill loop 4x to encourage the
3431 * compiler to emit NEON stores, cf.
3432 * <rdar://problem/25839866> Loop autovectorization
3433 * anomalies.
3434 * We use separate loops for each PAGE_SIZE
3435 * to allow the autovectorizer to engage, as PAGE_SIZE
3436 * is currently not a constant.
3437 */
3438
3439 __unreachable_ok_push
3440 if (PAGE_SIZE == 4096) {
3441 for (i = 0; i < (4096U / sizeof(int32_t)); i += 4) {
3442 *ddst++ = pattern;
3443 *ddst++ = pattern;
3444 *ddst++ = pattern;
3445 *ddst++ = pattern;
3446 }
3447 } else {
3448 assert(PAGE_SIZE == 16384);
3449 for (i = 0; i < (int)(16384U / sizeof(int32_t)); i += 4) {
3450 *ddst++ = pattern;
3451 *ddst++ = pattern;
3452 *ddst++ = pattern;
3453 *ddst++ = pattern;
3454 }
3455 }
3456 __unreachable_ok_pop
3457#endif
3458}
3459
3460static int
3461c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot)
3462{
3463 c_slot_t cs;
3464 c_segment_t c_seg;
3465 uint32_t c_segno;
3466 int c_indx;
3467 int c_rounded_size;
3468 uint32_t c_size;
3469 int retval = 0;
3470 boolean_t need_unlock = TRUE;
3471 boolean_t consider_defragmenting = FALSE;
3472 boolean_t kdp_mode = FALSE;
3473
3474 if (__improbable(flags & C_KDP)) {
3475 if (not_in_kdp) {
3476 panic("C_KDP passed to decompress page from outside of debugger context");
3477 }
3478
3479 assert((flags & C_KEEP) == C_KEEP);
3480 assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK);
3481
3482 if ((flags & (C_DONT_BLOCK | C_KEEP)) != (C_DONT_BLOCK | C_KEEP)) {
3483 return (-2);
3484 }
3485
3486 kdp_mode = TRUE;
3487 *zeroslot = 0;
3488 }
3489
3490ReTry:
3491 if (__probable(!kdp_mode)) {
3492 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3493 } else {
3494 if (kdp_lck_rw_lock_is_acquired_exclusive(&c_master_lock)) {
3495 return (-2);
3496 }
3497 }
3498
3499#if HIBERNATION
3500 /*
3501 * if hibernation is enabled, it indicates (via a call
3502 * to 'vm_decompressor_lock' that no further
3503 * decompressions are allowed once it reaches
3504 * the point of flushing all of the currently dirty
3505 * anonymous memory through the compressor and out
3506 * to disk... in this state we allow freeing of compressed
3507 * pages and must honor the C_DONT_BLOCK case
3508 */
3509 if (__improbable(dst && decompressions_blocked == TRUE)) {
3510 if (flags & C_DONT_BLOCK) {
3511
3512 if (__probable(!kdp_mode)) {
3513 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3514 }
3515
3516 *zeroslot = 0;
3517 return (-2);
3518 }
3519 /*
3520 * it's safe to atomically assert and block behind the
3521 * lock held in shared mode because "decompressions_blocked" is
3522 * only set and cleared and the thread_wakeup done when the lock
3523 * is held exclusively
3524 */
3525 assert_wait((event_t)&decompressions_blocked, THREAD_UNINT);
3526
3527 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3528
3529 thread_block(THREAD_CONTINUE_NULL);
3530
3531 goto ReTry;
3532 }
3533#endif
3534 /* s_cseg is actually "segno+1" */
3535 c_segno = slot_ptr->s_cseg - 1;
3536
3537 if (__improbable(c_segno >= c_segments_available))
3538 panic("c_decompress_page: c_segno %d >= c_segments_available %d, slot_ptr(%p), slot_data(%x)",
3539 c_segno, c_segments_available, slot_ptr, *(int *)((void *)slot_ptr));
3540
3541 if (__improbable(c_segments[c_segno].c_segno < c_segments_available))
3542 panic("c_decompress_page: c_segno %d is free, slot_ptr(%p), slot_data(%x)",
3543 c_segno, slot_ptr, *(int *)((void *)slot_ptr));
3544
3545 c_seg = c_segments[c_segno].c_seg;
3546
3547 if (__probable(!kdp_mode)) {
3548 lck_mtx_lock_spin_always(&c_seg->c_lock);
3549 } else {
3550 if (kdp_lck_mtx_lock_spin_is_acquired(&c_seg->c_lock)) {
3551 return (-2);
3552 }
3553 }
3554
3555 assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
3556
3557 if (dst == NULL && c_seg->c_busy_swapping) {
3558 assert(c_seg->c_busy);
3559
3560 goto bypass_busy_check;
3561 }
3562 if (flags & C_DONT_BLOCK) {
3563 if (c_seg->c_busy || (C_SEG_IS_ONDISK(c_seg) && dst)) {
3564 *zeroslot = 0;
3565
3566 retval = -2;
3567 goto done;
3568 }
3569 }
3570 if (c_seg->c_busy) {
3571
3572 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3573
3574 c_seg_wait_on_busy(c_seg);
3575
3576 goto ReTry;
3577 }
3578bypass_busy_check:
3579
3580 c_indx = slot_ptr->s_cindx;
3581
3582 if (__improbable(c_indx >= c_seg->c_nextslot))
3583 panic("c_decompress_page: c_indx %d >= c_nextslot %d, c_seg(%p), slot_ptr(%p), slot_data(%x)",
3584 c_indx, c_seg->c_nextslot, c_seg, slot_ptr, *(int *)((void *)slot_ptr));
3585
3586 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
3587
3588 c_size = UNPACK_C_SIZE(cs);
3589
3590 if (__improbable(c_size == 0))
3591 panic("c_decompress_page: c_size == 0, c_seg(%p), slot_ptr(%p), slot_data(%x)",
3592 c_seg, slot_ptr, *(int *)((void *)slot_ptr));
3593
3594 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3595
3596 if (dst) {
3597 uint32_t age_of_cseg;
3598 clock_sec_t cur_ts_sec;
3599 clock_nsec_t cur_ts_nsec;
3600
3601 if (C_SEG_IS_ONDISK(c_seg)) {
3602 assert(kdp_mode == FALSE);
3603 retval = c_seg_swapin(c_seg, FALSE, TRUE);
3604 assert(retval == 0);
3605
3606 retval = 1;
3607 }
3608 if (c_seg->c_state == C_ON_BAD_Q) {
3609 assert(c_seg->c_store.c_buffer == NULL);
3610 *zeroslot = 0;
3611
3612 retval = -1;
3613 goto done;
3614 }
3615
3616#if POPCOUNT_THE_COMPRESSED_DATA
3617 unsigned csvpop;
3618 uintptr_t csvaddr = (uintptr_t) &c_seg->c_store.c_buffer[cs->c_offset];
3619 if (cs->c_pop_cdata != (csvpop = vmc_pop(csvaddr, c_size))) {
3620 panic("Compressed data popcount doesn't match original, bit distance: %d %p (phys: %p) %p %p 0x%llx 0x%x 0x%x 0x%x", (csvpop - cs->c_pop_cdata), (void *)csvaddr, (void *) kvtophys(csvaddr), c_seg, cs, cs->c_offset, c_size, csvpop, cs->c_pop_cdata);
3621 }
3622#endif
3623
3624#if CHECKSUM_THE_COMPRESSED_DATA
3625 unsigned csvhash;
3626 if (cs->c_hash_compressed_data != (csvhash = vmc_hash((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))) {
3627 panic("Compressed data doesn't match original %p %p %u %u %u", c_seg, cs, c_size, cs->c_hash_compressed_data, csvhash);
3628 }
3629#endif
3630 if (c_rounded_size == PAGE_SIZE) {
3631 /*
3632 * page wasn't compressible... just copy it out
3633 */
3634 memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE);
3635 } else if (c_size == 4) {
3636 int32_t data;
3637 int32_t *dptr;
3638
3639 /*
3640 * page was populated with a single value
3641 * that didn't fit into our fast hash
3642 * so we packed it in as a single non-compressed value
3643 * that we need to populate the page with
3644 */
3645 dptr = (int32_t *)(uintptr_t)dst;
3646 data = *(int32_t *)(&c_seg->c_store.c_buffer[cs->c_offset]);
3647 sv_decompress(dptr, data);
3648 } else {
3649 uint32_t my_cpu_no;
3650 char *scratch_buf;
3651
3652 if (__probable(!kdp_mode)) {
3653 /*
3654 * we're behind the c_seg lock held in spin mode
3655 * which means pre-emption is disabled... therefore
3656 * the following sequence is atomic and safe
3657 */
3658 my_cpu_no = cpu_number();
3659
3660 assert(my_cpu_no < compressor_cpus);
3661
3662 scratch_buf = &compressor_scratch_bufs[my_cpu_no * vm_compressor_get_decode_scratch_size()];
3663 } else {
3664 scratch_buf = kdp_compressor_scratch_buf;
3665 }
3666
3667 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3668#if defined(__arm__) || defined(__arm64__)
3669 uint16_t c_codec = cs->c_codec;
3670 metadecompressor((const uint8_t *) &c_seg->c_store.c_buffer[cs->c_offset],
3671 (uint8_t *)dst, c_size, c_codec, (void *)scratch_buf);
3672#endif
3673 } else {
3674#if defined(__arm64__)
3675 __unreachable_ok_push
3676 if (PAGE_SIZE == 4096)
3677 WKdm_decompress_4k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3678 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
3679 else {
3680 WKdm_decompress_16k((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3681 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
3682 }
3683 __unreachable_ok_pop
3684#else
3685 WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3686 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
3687#endif
3688 }
3689 }
3690
3691#if CHECKSUM_THE_DATA
3692 if (cs->c_hash_data != vmc_hash(dst, PAGE_SIZE)) {
3693#if defined(__arm__) || defined(__arm64__)
3694 int32_t *dinput = &c_seg->c_store.c_buffer[cs->c_offset];
3695 panic("decompressed data doesn't match original cs: %p, hash: 0x%x, offset: %d, c_size: %d, c_rounded_size: %d, codec: %d, header: 0x%x 0x%x 0x%x", cs, cs->c_hash_data, cs->c_offset, c_size, c_rounded_size, cs->c_codec, *dinput, *(dinput + 1), *(dinput + 2));
3696#else
3697 panic("decompressed data doesn't match original cs: %p, hash: %d, offset: 0x%x, c_size: %d", cs, cs->c_hash_data, cs->c_offset, c_size);
3698#endif
3699 }
3700#endif
3701 if (c_seg->c_swappedin_ts == 0 && !kdp_mode) {
3702
3703 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
3704
3705 age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts;
3706 if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE)
3707 OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]);
3708 else
3709 OSAddAtomic(1, &overage_decompressions_during_sample_period);
3710
3711 OSAddAtomic(1, &sample_period_decompression_count);
3712 }
3713 }
3714 if (flags & C_KEEP) {
3715 *zeroslot = 0;
3716 goto done;
3717 }
3718 assert(kdp_mode == FALSE);
3719
3720 c_seg->c_bytes_unused += c_rounded_size;
3721 c_seg->c_bytes_used -= c_rounded_size;
3722
3723 assert(c_seg->c_slots_used);
3724 c_seg->c_slots_used--;
3725
3726 PACK_C_SIZE(cs, 0);
3727
3728 if (c_indx < c_seg->c_firstemptyslot)
3729 c_seg->c_firstemptyslot = c_indx;
3730
3731 OSAddAtomic(-1, &c_segment_pages_compressed);
3732
3733 if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) {
3734 /*
3735 * C_SEG_IS_ONDISK == TRUE can occur when we're doing a
3736 * free of a compressed page (i.e. dst == NULL)
3737 */
3738 OSAddAtomic64(-c_rounded_size, &compressor_bytes_used);
3739 }
3740 if (c_seg->c_busy_swapping) {
3741 /*
3742 * bypass case for c_busy_swapping...
3743 * let the swapin/swapout paths deal with putting
3744 * the c_seg on the minor compaction queue if needed
3745 */
3746 assert(c_seg->c_busy);
3747 goto done;
3748 }
3749 assert(!c_seg->c_busy);
3750
3751 if (c_seg->c_state != C_IS_FILLING) {
3752 if (c_seg->c_bytes_used == 0) {
3753 if ( !(C_SEG_IS_ONDISK(c_seg))) {
3754 int pages_populated;
3755
3756 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
3757 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
3758
3759 if (pages_populated) {
3760
3761 assert(c_seg->c_state != C_ON_BAD_Q);
3762 assert(c_seg->c_store.c_buffer != NULL);
3763
3764 C_SEG_BUSY(c_seg);
3765 lck_mtx_unlock_always(&c_seg->c_lock);
3766
3767 kernel_memory_depopulate(compressor_map, (vm_offset_t) c_seg->c_store.c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
3768
3769 lck_mtx_lock_spin_always(&c_seg->c_lock);
3770 C_SEG_WAKEUP_DONE(c_seg);
3771 }
3772 if (!c_seg->c_on_minorcompact_q && c_seg->c_state != C_ON_SWAPOUT_Q)
3773 c_seg_need_delayed_compaction(c_seg, FALSE);
3774 } else {
3775 if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q) {
3776
3777 c_seg_move_to_sparse_list(c_seg);
3778 consider_defragmenting = TRUE;
3779 }
3780 }
3781 } else if (c_seg->c_on_minorcompact_q) {
3782
3783 assert(c_seg->c_state != C_ON_BAD_Q);
3784
3785 if (C_SEG_SHOULD_MINORCOMPACT_NOW(c_seg)) {
3786 c_seg_try_minor_compaction_and_unlock(c_seg);
3787 need_unlock = FALSE;
3788 }
3789 } else if ( !(C_SEG_IS_ONDISK(c_seg))) {
3790
3791 if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
3792 c_seg_need_delayed_compaction(c_seg, FALSE);
3793 }
3794 } else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
3795
3796 c_seg_move_to_sparse_list(c_seg);
3797 consider_defragmenting = TRUE;
3798 }
3799 }
3800done:
3801 if (__improbable(kdp_mode)) {
3802 return retval;
3803 }
3804
3805 if (need_unlock == TRUE)
3806 lck_mtx_unlock_always(&c_seg->c_lock);
3807
3808 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3809
3810 if (consider_defragmenting == TRUE)
3811 vm_swap_consider_defragmenting();
3812
3813#if CONFIG_EMBEDDED
3814 if ((c_minor_count && COMPRESSOR_NEEDS_TO_MINOR_COMPACT()) || vm_compressor_needs_to_major_compact())
3815 vm_wake_compactor_swapper();
3816#endif
3817
3818 return (retval);
3819}
3820
3821
3822int
3823vm_compressor_get(ppnum_t pn, int *slot, int flags)
3824{
3825 c_slot_mapping_t slot_ptr;
3826 char *dst;
3827 int zeroslot = 1;
3828 int retval;
3829
3830#if __x86_64__
3831 dst = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
3832#elif __arm__ || __arm64__
3833 dst = (char *) phystokv((pmap_paddr_t)pn << PAGE_SHIFT);
3834#else
3835#error "unsupported architecture"
3836#endif
3837 slot_ptr = (c_slot_mapping_t)slot;
3838
3839 if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
3840 int32_t data;
3841 int32_t *dptr;
3842
3843 /*
3844 * page was populated with a single value
3845 * that found a home in our hash table
3846 * grab that value from the hash and populate the page
3847 * that we need to populate the page with
3848 */
3849 dptr = (int32_t *)(uintptr_t)dst;
3850 data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data;
3851#if __x86_64__
3852 memset_word(dptr, data, PAGE_SIZE / sizeof(int32_t));
3853#else
3854 {
3855 int i;
3856
3857 for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
3858 *dptr++ = data;
3859 }
3860#endif
3861 if ( !(flags & C_KEEP)) {
3862 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
3863
3864 OSAddAtomic(-1, &c_segment_pages_compressed);
3865 *slot = 0;
3866 }
3867 if (data)
3868 OSAddAtomic(1, &c_segment_svp_nonzero_decompressions);
3869 else
3870 OSAddAtomic(1, &c_segment_svp_zero_decompressions);
3871
3872 return (0);
3873 }
3874
3875 retval = c_decompress_page(dst, slot_ptr, flags, &zeroslot);
3876
3877 /*
3878 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
3879 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE
3880 */
3881 if (zeroslot) {
3882 *slot = 0;
3883 }
3884 /*
3885 * returns 0 if we successfully decompressed a page from a segment already in memory
3886 * returns 1 if we had to first swap in the segment, before successfully decompressing the page
3887 * returns -1 if we encountered an error swapping in the segment - decompression failed
3888 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true
3889 */
3890 return (retval);
3891}
3892
3893
3894int
3895vm_compressor_free(int *slot, int flags)
3896{
3897 c_slot_mapping_t slot_ptr;
3898 int zeroslot = 1;
3899 int retval;
3900
3901 assert(flags == 0 || flags == C_DONT_BLOCK);
3902
3903 slot_ptr = (c_slot_mapping_t)slot;
3904
3905 if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
3906
3907 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
3908 OSAddAtomic(-1, &c_segment_pages_compressed);
3909
3910 *slot = 0;
3911 return (0);
3912 }
3913 retval = c_decompress_page(NULL, slot_ptr, flags, &zeroslot);
3914 /*
3915 * returns 0 if we successfully freed the specified compressed page
3916 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set
3917 */
3918
3919 if (retval == 0)
3920 *slot = 0;
3921 else
3922 assert(retval == -2);
3923
3924 return (retval);
3925}
3926
3927
3928int
3929vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf)
3930{
3931 char *src;
3932 int retval;
3933
3934#if __x86_64__
3935 src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
3936#elif __arm__ || __arm64__
3937 src = (char *) phystokv((pmap_paddr_t)pn << PAGE_SHIFT);
3938#else
3939#error "unsupported architecture"
3940#endif
3941
3942 retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf);
3943
3944 return (retval);
3945}
3946
3947void
3948vm_compressor_transfer(
3949 int *dst_slot_p,
3950 int *src_slot_p)
3951{
3952 c_slot_mapping_t dst_slot, src_slot;
3953 c_segment_t c_seg;
3954 int c_indx;
3955 c_slot_t cs;
3956
3957 src_slot = (c_slot_mapping_t) src_slot_p;
3958
3959 if (src_slot->s_cseg == C_SV_CSEG_ID) {
3960 *dst_slot_p = *src_slot_p;
3961 *src_slot_p = 0;
3962 return;
3963 }
3964 dst_slot = (c_slot_mapping_t) dst_slot_p;
3965Retry:
3966 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3967 /* get segment for src_slot */
3968 c_seg = c_segments[src_slot->s_cseg -1].c_seg;
3969 /* lock segment */
3970 lck_mtx_lock_spin_always(&c_seg->c_lock);
3971 /* wait if it's busy */
3972 if (c_seg->c_busy && !c_seg->c_busy_swapping) {
3973 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3974 c_seg_wait_on_busy(c_seg);
3975 goto Retry;
3976 }
3977 /* find the c_slot */
3978 c_indx = src_slot->s_cindx;
3979 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
3980 /* point the c_slot back to dst_slot instead of src_slot */
3981 cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot);
3982 /* transfer */
3983 *dst_slot_p = *src_slot_p;
3984 *src_slot_p = 0;
3985 lck_mtx_unlock_always(&c_seg->c_lock);
3986 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3987}
3988
3989#if CONFIG_FREEZE
3990
3991int freezer_finished_filling = 0;
3992
3993void
3994vm_compressor_finished_filling(
3995 void **current_chead)
3996{
3997 c_segment_t c_seg;
3998
3999 if ((c_seg = *(c_segment_t *)current_chead) == NULL)
4000 return;
4001
4002 assert(c_seg->c_state == C_IS_FILLING);
4003
4004 lck_mtx_lock_spin_always(&c_seg->c_lock);
4005
4006 c_current_seg_filled(c_seg, (c_segment_t *)current_chead);
4007
4008 lck_mtx_unlock_always(&c_seg->c_lock);
4009
4010 freezer_finished_filling++;
4011}
4012
4013
4014/*
4015 * This routine is used to transfer the compressed chunks from
4016 * the c_seg/cindx pointed to by slot_p into a new c_seg headed
4017 * by the current_chead and a new cindx within that c_seg.
4018 *
4019 * Currently, this routine is only used by the "freezer backed by
4020 * compressor with swap" mode to create a series of c_segs that
4021 * only contain compressed data belonging to one task. So, we
4022 * move a task's previously compressed data into a set of new
4023 * c_segs which will also hold the task's yet to be compressed data.
4024 */
4025
4026kern_return_t
4027vm_compressor_relocate(
4028 void **current_chead,
4029 int *slot_p)
4030{
4031 c_slot_mapping_t slot_ptr;
4032 c_slot_mapping_t src_slot;
4033 uint32_t c_rounded_size;
4034 uint32_t c_size;
4035 uint16_t dst_slot;
4036 c_slot_t c_dst;
4037 c_slot_t c_src;
4038 int c_indx;
4039 c_segment_t c_seg_dst = NULL;
4040 c_segment_t c_seg_src = NULL;
4041 kern_return_t kr = KERN_SUCCESS;
4042
4043
4044 src_slot = (c_slot_mapping_t) slot_p;
4045
4046 if (src_slot->s_cseg == C_SV_CSEG_ID) {
4047 /*
4048 * no need to relocate... this is a page full of a single
4049 * value which is hashed to a single entry not contained
4050 * in a c_segment_t
4051 */
4052 return (kr);
4053 }
4054
4055Relookup_dst:
4056 c_seg_dst = c_seg_allocate((c_segment_t *)current_chead);
4057 /*
4058 * returns with c_seg lock held
4059 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
4060 * c_nextslot has been allocated and
4061 * c_store.c_buffer populated
4062 */
4063 if (c_seg_dst == NULL) {
4064 /*
4065 * Out of compression segments?
4066 */
4067 kr = KERN_RESOURCE_SHORTAGE;
4068 goto out;
4069 }
4070
4071 assert(c_seg_dst->c_busy == 0);
4072
4073 C_SEG_BUSY(c_seg_dst);
4074
4075 dst_slot = c_seg_dst->c_nextslot;
4076
4077 lck_mtx_unlock_always(&c_seg_dst->c_lock);
4078
4079Relookup_src:
4080 c_seg_src = c_segments[src_slot->s_cseg - 1].c_seg;
4081
4082 assert(c_seg_dst != c_seg_src);
4083
4084 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4085
4086 if (C_SEG_IS_ONDISK(c_seg_src)) {
4087
4088 /*
4089 * A "thaw" can mark a process as eligible for
4090 * another freeze cycle without bringing any of
4091 * its swapped out c_segs back from disk (because
4092 * that is done on-demand).
4093 *
4094 * If the src c_seg we find for our pre-compressed
4095 * data is already on-disk, then we are dealing
4096 * with an app's data that is already packed and
4097 * swapped out. Don't do anything.
4098 */
4099
4100 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4101
4102 lck_mtx_unlock_always(&c_seg_src->c_lock);
4103
4104 c_seg_src = NULL;
4105
4106 goto out;
4107 }
4108
4109 if (c_seg_src->c_busy) {
4110
4111 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4112 c_seg_wait_on_busy(c_seg_src);
4113
4114 c_seg_src = NULL;
4115
4116 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4117
4118 goto Relookup_src;
4119 }
4120
4121 C_SEG_BUSY(c_seg_src);
4122
4123 lck_mtx_unlock_always(&c_seg_src->c_lock);
4124
4125 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4126
4127 /* find the c_slot */
4128 c_indx = src_slot->s_cindx;
4129
4130 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx);
4131
4132 c_size = UNPACK_C_SIZE(c_src);
4133
4134 assert(c_size);
4135
4136 if (c_size > (uint32_t)(C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) {
4137 /*
4138 * This segment is full. We need a new one.
4139 */
4140
4141 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4142
4143 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4144 C_SEG_WAKEUP_DONE(c_seg_src);
4145 lck_mtx_unlock_always(&c_seg_src->c_lock);
4146
4147 c_seg_src = NULL;
4148
4149 lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
4150
4151 assert(c_seg_dst->c_busy);
4152 assert(c_seg_dst->c_state == C_IS_FILLING);
4153 assert(!c_seg_dst->c_on_minorcompact_q);
4154
4155 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
4156 assert(*current_chead == NULL);
4157
4158 C_SEG_WAKEUP_DONE(c_seg_dst);
4159
4160 lck_mtx_unlock_always(&c_seg_dst->c_lock);
4161
4162 c_seg_dst = NULL;
4163
4164 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4165
4166 goto Relookup_dst;
4167 }
4168
4169 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
4170
4171 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
4172//is platform alignment actually necessary since wkdm aligns its output?
4173 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
4174
4175 cslot_copy(c_dst, c_src);
4176 c_dst->c_offset = c_seg_dst->c_nextoffset;
4177
4178 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
4179 c_seg_dst->c_firstemptyslot++;
4180
4181 c_seg_dst->c_slots_used++;
4182 c_seg_dst->c_nextslot++;
4183 c_seg_dst->c_bytes_used += c_rounded_size;
4184 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
4185
4186
4187 PACK_C_SIZE(c_src, 0);
4188
4189 c_seg_src->c_bytes_used -= c_rounded_size;
4190 c_seg_src->c_bytes_unused += c_rounded_size;
4191
4192 assert(c_seg_src->c_slots_used);
4193 c_seg_src->c_slots_used--;
4194
4195 if (c_indx < c_seg_src->c_firstemptyslot) {
4196 c_seg_src->c_firstemptyslot = c_indx;
4197 }
4198
4199 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
4200
4201 PAGE_REPLACEMENT_ALLOWED(TRUE);
4202 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
4203 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
4204 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
4205 slot_ptr->s_cindx = dst_slot;
4206
4207 PAGE_REPLACEMENT_ALLOWED(FALSE);
4208
4209out:
4210 if (c_seg_src) {
4211
4212 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
4213
4214 C_SEG_WAKEUP_DONE(c_seg_src);
4215
4216 if (c_seg_src->c_bytes_used == 0 && c_seg_src->c_state != C_IS_FILLING) {
4217 if (!c_seg_src->c_on_minorcompact_q)
4218 c_seg_need_delayed_compaction(c_seg_src, FALSE);
4219 }
4220
4221 lck_mtx_unlock_always(&c_seg_src->c_lock);
4222 }
4223
4224 if (c_seg_dst) {
4225
4226 PAGE_REPLACEMENT_DISALLOWED(TRUE);
4227
4228 lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
4229
4230 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
4231 /*
4232 * Nearing or exceeded maximum slot and offset capacity.
4233 */
4234 assert(c_seg_dst->c_busy);
4235 assert(c_seg_dst->c_state == C_IS_FILLING);
4236 assert(!c_seg_dst->c_on_minorcompact_q);
4237
4238 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
4239 assert(*current_chead == NULL);
4240 }
4241
4242 C_SEG_WAKEUP_DONE(c_seg_dst);
4243
4244 lck_mtx_unlock_always(&c_seg_dst->c_lock);
4245
4246 c_seg_dst = NULL;
4247
4248 PAGE_REPLACEMENT_DISALLOWED(FALSE);
4249 }
4250
4251 return kr;
4252}
4253#endif /* CONFIG_FREEZE */