2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <vm/vm_compressor.h>
30 #include <vm/vm_map.h>
31 #include <vm/vm_pageout.h>
32 #include <vm/memory_object.h>
33 #include <mach/mach_host.h> /* for host_info() */
34 #include <kern/ledger.h>
36 #include <default_pager/default_pager_alerts.h>
37 #include <default_pager/default_pager_object_server.h>
39 #include <IOKit/IOHibernatePrivate.h>
42 * vm_compressor_mode has a heirarchy of control to set its value.
43 * boot-args are checked first, then device-tree, and finally
44 * the default value that is defined below. See vm_fault_init() for
45 * the boot-arg & device-tree code.
48 extern ipc_port_t min_pages_trigger_port
;
49 extern lck_mtx_t paging_segments_lock
;
50 #define PSL_LOCK() lck_mtx_lock(&paging_segments_lock)
51 #define PSL_UNLOCK() lck_mtx_unlock(&paging_segments_lock)
54 int vm_compressor_mode
= VM_PAGER_COMPRESSOR_WITH_SWAP
;
58 int vm_compression_limit
= 0;
60 extern boolean_t vm_swap_up
;
61 extern void vm_pageout_io_throttle(void);
63 #if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA
64 extern unsigned int hash_string(char *cp
, int len
);
68 uint64_t c_offset
:C_SEG_OFFSET_BITS
,
72 unsigned int c_hash_data
;
74 #if CHECKSUM_THE_COMPRESSED_DATA
75 unsigned int c_hash_compressed_data
;
80 #define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? 4096 : cs->c_size)
81 #define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
84 struct c_slot_mapping
{
85 uint32_t s_cseg
:22, /* segment number + 1 */
86 s_cindx
:10; /* index in the segment */
89 typedef struct c_slot_mapping
*c_slot_mapping_t
;
99 #define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) >> 2)
100 #define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS)
103 uint32_t c_segment_count
= 0;
105 uint64_t c_generation_id
= 0;
106 uint64_t c_generation_id_flush_barrier
;
109 #define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120
111 boolean_t hibernate_no_swapspace
= FALSE
;
112 clock_sec_t hibernate_flushing_deadline
= 0;
115 #if TRACK_BAD_C_SEGMENTS
116 queue_head_t c_bad_list_head
;
117 uint32_t c_bad_count
= 0;
120 queue_head_t c_age_list_head
;
121 queue_head_t c_swapout_list_head
;
122 queue_head_t c_swappedin_list_head
;
123 queue_head_t c_swappedout_list_head
;
124 queue_head_t c_swappedout_sparse_list_head
;
126 uint32_t c_age_count
= 0;
127 uint32_t c_swapout_count
= 0;
128 uint32_t c_swappedin_count
= 0;
129 uint32_t c_swappedout_count
= 0;
130 uint32_t c_swappedout_sparse_count
= 0;
132 queue_head_t c_minor_list_head
;
133 uint32_t c_minor_count
= 0;
135 union c_segu
*c_segments
;
136 caddr_t c_segments_next_page
;
137 boolean_t c_segments_busy
;
138 uint32_t c_segments_available
;
139 uint32_t c_segments_limit
;
140 uint32_t c_segment_pages_compressed
;
141 uint32_t c_segment_pages_compressed_limit
;
142 uint32_t c_free_segno_head
= (uint32_t)-1;
144 uint32_t vm_compressor_minorcompact_threshold_divisor
= 10;
145 uint32_t vm_compressor_majorcompact_threshold_divisor
= 10;
146 uint32_t vm_compressor_unthrottle_threshold_divisor
= 10;
147 uint32_t vm_compressor_catchup_threshold_divisor
= 10;
149 #define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu))
152 lck_grp_attr_t vm_compressor_lck_grp_attr
;
153 lck_attr_t vm_compressor_lck_attr
;
154 lck_grp_t vm_compressor_lck_grp
;
157 #if __i386__ || __x86_64__
158 lck_mtx_t
*c_list_lock
;
159 #else /* __i386__ || __x86_64__ */
160 lck_spin_t
*c_list_lock
;
161 #endif /* __i386__ || __x86_64__ */
163 lck_rw_t c_master_lock
;
164 lck_rw_t c_decompressor_lock
;
166 zone_t compressor_segment_zone
;
167 int c_compressor_swap_trigger
= 0;
169 uint32_t compressor_cpus
;
170 char *compressor_scratch_bufs
;
173 clock_sec_t start_of_sample_period_sec
= 0;
174 clock_nsec_t start_of_sample_period_nsec
= 0;
175 clock_sec_t start_of_eval_period_sec
= 0;
176 clock_nsec_t start_of_eval_period_nsec
= 0;
177 uint32_t sample_period_decompression_count
= 0;
178 uint32_t sample_period_compression_count
= 0;
179 uint32_t last_eval_decompression_count
= 0;
180 uint32_t last_eval_compression_count
= 0;
182 #define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30)
184 uint32_t swapout_target_age
= 0;
185 uint32_t age_of_decompressions_during_sample_period
[DECOMPRESSION_SAMPLE_MAX_AGE
];
186 uint32_t overage_decompressions_during_sample_period
= 0;
188 void do_fastwake_warmup(void);
189 boolean_t fastwake_warmup
= FALSE
;
190 boolean_t fastwake_recording_in_progress
= FALSE
;
191 clock_sec_t dont_trim_until_ts
= 0;
193 uint64_t c_segment_warmup_count
;
194 uint64_t first_c_segment_to_warm_generation_id
= 0;
195 uint64_t last_c_segment_to_warm_generation_id
= 0;
196 boolean_t hibernate_flushing
= FALSE
;
198 int64_t c_segment_input_bytes
= 0;
199 int64_t c_segment_compressed_bytes
= 0;
200 int64_t compressor_bytes_used
= 0;
202 static boolean_t
compressor_needs_to_swap(void);
203 static void vm_compressor_swap_trigger_thread(void);
204 static void vm_compressor_do_delayed_compactions(boolean_t
);
205 static void vm_compressor_compact_and_swap(boolean_t
);
206 static void vm_compressor_age_swapped_in_segments(boolean_t
);
207 static uint64_t compute_elapsed_msecs(clock_sec_t
, clock_nsec_t
, clock_sec_t
, clock_nsec_t
);
209 boolean_t
vm_compressor_low_on_space(void);
211 void compute_swapout_target_age(void);
213 boolean_t
c_seg_major_compact(c_segment_t
, c_segment_t
);
214 boolean_t
c_seg_major_compact_ok(c_segment_t
, c_segment_t
);
216 int c_seg_minor_compaction_and_unlock(c_segment_t
, boolean_t
);
217 int c_seg_do_minor_compaction_and_unlock(c_segment_t
, boolean_t
, boolean_t
, boolean_t
);
218 void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg
);
219 void c_seg_need_delayed_compaction(c_segment_t
);
221 void c_seg_move_to_sparse_list(c_segment_t
);
222 void c_seg_insert_into_q(queue_head_t
*, c_segment_t
);
224 boolean_t
c_seg_try_free(c_segment_t
);
225 void c_seg_free(c_segment_t
);
226 void c_seg_free_locked(c_segment_t
);
229 uint64_t vm_available_memory(void);
231 extern unsigned int dp_pages_free
, dp_pages_reserve
;
234 vm_available_memory(void)
236 return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY
) * PAGE_SIZE_64
);
241 vm_compression_available(void)
243 if ( !(COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
))
246 if (c_segments_available
>= c_segments_limit
|| c_segment_pages_compressed
>= c_segment_pages_compressed_limit
)
254 vm_compressor_low_on_space(void)
256 if ((c_segment_pages_compressed
> (c_segment_pages_compressed_limit
- 20000)) ||
257 (c_segment_count
> (c_segments_limit
- 250)))
265 vm_low_on_space(void)
267 if (vm_compressor_mode
== COMPRESSED_PAGER_IS_ACTIVE
|| vm_compressor_mode
== DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
268 if (vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED())
271 if (((dp_pages_free
+ dp_pages_reserve
< 2000) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
)))
279 vm_compressor_init_locks(void)
281 lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr
);
282 lck_grp_init(&vm_compressor_lck_grp
, "vm_compressor", &vm_compressor_lck_grp_attr
);
283 lck_attr_setdefault(&vm_compressor_lck_attr
);
285 lck_rw_init(&c_master_lock
, &vm_compressor_lck_grp
, &vm_compressor_lck_attr
);
286 lck_rw_init(&c_decompressor_lock
, &vm_compressor_lck_grp
, &vm_compressor_lck_attr
);
291 vm_decompressor_lock(void)
293 lck_rw_lock_exclusive(&c_decompressor_lock
);
297 vm_decompressor_unlock(void)
299 lck_rw_done(&c_decompressor_lock
);
306 vm_compressor_init(void)
310 assert((C_SEGMENTS_PER_PAGE
* sizeof(union c_segu
)) == PAGE_SIZE
);
312 PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit
, sizeof (vm_compression_limit
));
314 if (max_mem
<= (3ULL * 1024ULL * 1024ULL * 1024ULL)) {
315 vm_compressor_minorcompact_threshold_divisor
= 11;
316 vm_compressor_majorcompact_threshold_divisor
= 13;
317 vm_compressor_unthrottle_threshold_divisor
= 20;
318 vm_compressor_catchup_threshold_divisor
= 35;
320 vm_compressor_minorcompact_threshold_divisor
= 20;
321 vm_compressor_majorcompact_threshold_divisor
= 25;
322 vm_compressor_unthrottle_threshold_divisor
= 35;
323 vm_compressor_catchup_threshold_divisor
= 50;
326 * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks
327 * c_master_lock needs to be available early so that "vm_page_find_contiguous" can
328 * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor.
331 #if __i386__ || __x86_64__
332 c_list_lock
= lck_mtx_alloc_init(&vm_compressor_lck_grp
, &vm_compressor_lck_attr
);
333 #else /* __i386__ || __x86_64__ */
334 c_list_lock
= lck_spin_alloc_init(&vm_compressor_lck_grp
, &vm_compressor_lck_attr
);
335 #endif /* __i386__ || __x86_64__ */
337 #if TRACK_BAD_C_SEGMENTS
338 queue_init(&c_bad_list_head
);
340 queue_init(&c_age_list_head
);
341 queue_init(&c_minor_list_head
);
342 queue_init(&c_swapout_list_head
);
343 queue_init(&c_swappedin_list_head
);
344 queue_init(&c_swappedout_list_head
);
345 queue_init(&c_swappedout_sparse_list_head
);
347 compressor_segment_zone
= zinit(sizeof (struct c_segment
),
348 128000 * sizeof (struct c_segment
),
349 8192, "compressor_segment");
350 zone_change(compressor_segment_zone
, Z_CALLERACCT
, FALSE
);
351 zone_change(compressor_segment_zone
, Z_NOENCRYPT
, TRUE
);
354 c_free_segno_head
= -1;
355 c_segments_available
= 0;
357 if (vm_compression_limit
== 0) {
358 c_segment_pages_compressed_limit
= (uint32_t)((max_mem
/ PAGE_SIZE
)) * vm_scale
;
360 #define OLD_SWAP_LIMIT (1024 * 1024 * 16)
361 #define MAX_SWAP_LIMIT (1024 * 1024 * 128)
363 if (c_segment_pages_compressed_limit
> (OLD_SWAP_LIMIT
))
364 c_segment_pages_compressed_limit
= OLD_SWAP_LIMIT
;
366 if (c_segment_pages_compressed_limit
< (uint32_t)(max_mem
/ PAGE_SIZE_64
))
367 c_segment_pages_compressed_limit
= (uint32_t)(max_mem
/ PAGE_SIZE_64
);
369 if (vm_compression_limit
< MAX_SWAP_LIMIT
)
370 c_segment_pages_compressed_limit
= vm_compression_limit
;
372 c_segment_pages_compressed_limit
= MAX_SWAP_LIMIT
;
374 if ((c_segments_limit
= c_segment_pages_compressed_limit
/ (C_SEG_BUFSIZE
/ PAGE_SIZE
)) > C_SEG_MAX_LIMIT
)
375 c_segments_limit
= C_SEG_MAX_LIMIT
;
377 c_segments_busy
= FALSE
;
379 if (kernel_memory_allocate(kernel_map
, (vm_offset_t
*)(&c_segments
), (sizeof(union c_segu
) * c_segments_limit
), 0, KMA_KOBJECT
| KMA_VAONLY
) != KERN_SUCCESS
)
380 panic("vm_compressor_init: kernel_memory_allocate failed\n");
382 c_segments_next_page
= (caddr_t
)c_segments
;
385 host_basic_info_data_t hinfo
;
386 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
389 host_info((host_t
)BSD_HOST
, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
391 compressor_cpus
= hinfo
.max_cpus
;
393 compressor_scratch_bufs
= kalloc(compressor_cpus
* WKdm_SCRATCH_BUF_SIZE
);
396 if (kernel_thread_start_priority((thread_continue_t
)vm_compressor_swap_trigger_thread
, NULL
,
397 BASEPRI_PREEMPT
- 1, &thread
) != KERN_SUCCESS
) {
398 panic("vm_compressor_swap_trigger_thread: create failed");
400 thread
->options
|= TH_OPT_VMPRIV
;
402 thread_deallocate(thread
);
404 assert(default_pager_init_flag
== 0);
406 if (vm_pageout_internal_start() != KERN_SUCCESS
) {
407 panic("vm_compressor_init: Failed to start the internal pageout thread.\n");
411 memorystatus_freeze_enabled
= TRUE
;
412 #endif /* CONFIG_FREEZE */
414 default_pager_init_flag
= 1;
416 vm_page_reactivate_all_throttled();
420 #if VALIDATE_C_SEGMENTS
423 c_seg_validate(c_segment_t c_seg
, boolean_t must_be_compact
)
427 int32_t bytes_unused
;
428 uint32_t c_rounded_size
;
432 if (c_seg
->c_firstemptyslot
< c_seg
->c_nextslot
) {
433 c_indx
= c_seg
->c_firstemptyslot
;
434 cs
= C_SEG_SLOT_FROM_INDEX(c_seg
, c_indx
);
437 panic("c_seg_validate: no slot backing c_firstemptyslot");
440 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs
->c_size
);
445 for (c_indx
= 0; c_indx
< c_seg
->c_nextslot
; c_indx
++) {
447 cs
= C_SEG_SLOT_FROM_INDEX(c_seg
, c_indx
);
449 c_size
= UNPACK_C_SIZE(cs
);
451 c_rounded_size
= (c_size
+ C_SEG_OFFSET_ALIGNMENT_MASK
) & ~C_SEG_OFFSET_ALIGNMENT_MASK
;
453 bytes_used
+= c_rounded_size
;
455 #if CHECKSUM_THE_COMPRESSED_DATA
456 if (c_size
&& cs
->c_hash_compressed_data
!= hash_string((char *)&c_seg
->c_store
.c_buffer
[cs
->c_offset
], c_size
))
457 panic("compressed data doesn't match original");
461 if (bytes_used
!= c_seg
->c_bytes_used
)
462 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used
, c_seg
->c_bytes_used
);
464 if (c_seg
->c_bytes_used
> C_SEG_OFFSET_TO_BYTES((int32_t)c_seg
->c_nextoffset
))
465 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
466 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg
->c_nextoffset
), c_seg
->c_bytes_used
);
468 if (must_be_compact
) {
469 if (c_seg
->c_bytes_used
!= C_SEG_OFFSET_TO_BYTES((int32_t)c_seg
->c_nextoffset
))
470 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
471 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg
->c_nextoffset
), c_seg
->c_bytes_used
);
479 c_seg_need_delayed_compaction(c_segment_t c_seg
)
481 boolean_t clear_busy
= FALSE
;
483 if ( !lck_mtx_try_lock_spin_always(c_list_lock
)) {
486 lck_mtx_unlock_always(&c_seg
->c_lock
);
487 lck_mtx_lock_spin_always(c_list_lock
);
488 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
492 if (!c_seg
->c_on_minorcompact_q
&& !c_seg
->c_ondisk
&& !c_seg
->c_on_swapout_q
) {
493 queue_enter(&c_minor_list_head
, c_seg
, c_segment_t
, c_list
);
494 c_seg
->c_on_minorcompact_q
= 1;
497 lck_mtx_unlock_always(c_list_lock
);
499 if (clear_busy
== TRUE
)
500 C_SEG_WAKEUP_DONE(c_seg
);
504 unsigned int c_seg_moved_to_sparse_list
= 0;
507 c_seg_move_to_sparse_list(c_segment_t c_seg
)
509 boolean_t clear_busy
= FALSE
;
511 if ( !lck_mtx_try_lock_spin_always(c_list_lock
)) {
514 lck_mtx_unlock_always(&c_seg
->c_lock
);
515 lck_mtx_lock_spin_always(c_list_lock
);
516 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
520 assert(c_seg
->c_ondisk
);
521 assert(c_seg
->c_on_swappedout_q
);
522 assert(!c_seg
->c_on_swappedout_sparse_q
);
524 queue_remove(&c_swappedout_list_head
, c_seg
, c_segment_t
, c_age_list
);
525 c_seg
->c_on_swappedout_q
= 0;
526 c_swappedout_count
--;
528 c_seg_insert_into_q(&c_swappedout_sparse_list_head
, c_seg
);
529 c_seg
->c_on_swappedout_sparse_q
= 1;
530 c_swappedout_sparse_count
++;
532 c_seg_moved_to_sparse_list
++;
534 lck_mtx_unlock_always(c_list_lock
);
536 if (clear_busy
== TRUE
)
537 C_SEG_WAKEUP_DONE(c_seg
);
542 c_seg_insert_into_q(queue_head_t
*qhead
, c_segment_t c_seg
)
544 c_segment_t c_seg_next
;
546 if (queue_empty(qhead
)) {
547 queue_enter(qhead
, c_seg
, c_segment_t
, c_age_list
);
549 c_seg_next
= (c_segment_t
)queue_first(qhead
);
553 if (c_seg
->c_generation_id
< c_seg_next
->c_generation_id
) {
554 queue_insert_before(qhead
, c_seg
, c_seg_next
, c_segment_t
, c_age_list
);
557 c_seg_next
= (c_segment_t
) queue_next(&c_seg_next
->c_age_list
);
559 if (queue_end(qhead
, (queue_entry_t
) c_seg_next
)) {
560 queue_enter(qhead
, c_seg
, c_segment_t
, c_age_list
);
568 int try_minor_compaction_failed
= 0;
569 int try_minor_compaction_succeeded
= 0;
572 c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg
)
575 assert(c_seg
->c_on_minorcompact_q
);
577 * c_seg is currently on the delayed minor compaction
578 * queue and we have c_seg locked... if we can get the
579 * c_list_lock w/o blocking (if we blocked we could deadlock
580 * because the lock order is c_list_lock then c_seg's lock)
581 * we'll pull it from the delayed list and free it directly
583 if ( !lck_mtx_try_lock_spin_always(c_list_lock
)) {
585 * c_list_lock is held, we need to bail
587 try_minor_compaction_failed
++;
589 lck_mtx_unlock_always(&c_seg
->c_lock
);
591 try_minor_compaction_succeeded
++;
594 c_seg_do_minor_compaction_and_unlock(c_seg
, TRUE
, FALSE
, FALSE
);
600 c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg
, boolean_t clear_busy
, boolean_t need_list_lock
, boolean_t disallow_page_replacement
)
604 assert(c_seg
->c_busy
);
606 if (!c_seg
->c_on_minorcompact_q
) {
607 if (clear_busy
== TRUE
)
608 C_SEG_WAKEUP_DONE(c_seg
);
610 lck_mtx_unlock_always(&c_seg
->c_lock
);
614 queue_remove(&c_minor_list_head
, c_seg
, c_segment_t
, c_list
);
615 c_seg
->c_on_minorcompact_q
= 0;
618 lck_mtx_unlock_always(c_list_lock
);
620 if (disallow_page_replacement
== TRUE
) {
621 lck_mtx_unlock_always(&c_seg
->c_lock
);
623 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
625 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
627 c_seg_freed
= c_seg_minor_compaction_and_unlock(c_seg
, clear_busy
);
629 if (disallow_page_replacement
== TRUE
)
630 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
632 if (need_list_lock
== TRUE
)
633 lck_mtx_lock_spin_always(c_list_lock
);
635 return (c_seg_freed
);
640 c_seg_wait_on_busy(c_segment_t c_seg
)
643 assert_wait((event_t
) (c_seg
), THREAD_UNINT
);
645 lck_mtx_unlock_always(&c_seg
->c_lock
);
646 thread_block(THREAD_CONTINUE_NULL
);
651 int try_free_succeeded
= 0;
652 int try_free_failed
= 0;
655 c_seg_try_free(c_segment_t c_seg
)
658 * c_seg is currently on the delayed minor compaction
659 * or the spapped out sparse queue and we have c_seg locked...
660 * if we can get the c_list_lock w/o blocking (if we blocked we
661 * could deadlock because the lock order is c_list_lock then c_seg's lock)
662 * we'll pull it from the appropriate queue and free it
664 if ( !lck_mtx_try_lock_spin_always(c_list_lock
)) {
666 * c_list_lock is held, we need to bail
671 if (c_seg
->c_on_minorcompact_q
) {
672 queue_remove(&c_minor_list_head
, c_seg
, c_segment_t
, c_list
);
673 c_seg
->c_on_minorcompact_q
= 0;
676 assert(c_seg
->c_on_swappedout_sparse_q
);
679 * c_seg_free_locked will remove it from the swappedout sparse list
682 if (!c_seg
->c_busy_swapping
)
685 c_seg_free_locked(c_seg
);
687 try_free_succeeded
++;
694 c_seg_free(c_segment_t c_seg
)
696 if (!c_seg
->c_busy_swapping
)
699 lck_mtx_unlock_always(&c_seg
->c_lock
);
700 lck_mtx_lock_spin_always(c_list_lock
);
701 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
703 c_seg_free_locked(c_seg
);
708 c_seg_free_locked(c_segment_t c_seg
)
712 int32_t *c_buffer
= NULL
;
713 uint64_t c_swap_handle
;
715 assert(!c_seg
->c_on_minorcompact_q
);
717 if (c_seg
->c_on_age_q
) {
718 queue_remove(&c_age_list_head
, c_seg
, c_segment_t
, c_age_list
);
719 c_seg
->c_on_age_q
= 0;
721 } else if (c_seg
->c_on_swappedin_q
) {
722 queue_remove(&c_swappedin_list_head
, c_seg
, c_segment_t
, c_age_list
);
723 c_seg
->c_on_swappedin_q
= 0;
725 } else if (c_seg
->c_on_swapout_q
) {
726 queue_remove(&c_swapout_list_head
, c_seg
, c_segment_t
, c_age_list
);
727 c_seg
->c_on_swapout_q
= 0;
729 thread_wakeup((event_t
)&compaction_swapper_running
);
730 } else if (c_seg
->c_on_swappedout_q
) {
731 queue_remove(&c_swappedout_list_head
, c_seg
, c_segment_t
, c_age_list
);
732 c_seg
->c_on_swappedout_q
= 0;
733 c_swappedout_count
--;
734 } else if (c_seg
->c_on_swappedout_sparse_q
) {
735 queue_remove(&c_swappedout_sparse_list_head
, c_seg
, c_segment_t
, c_age_list
);
736 c_seg
->c_on_swappedout_sparse_q
= 0;
737 c_swappedout_sparse_count
--;
739 #if TRACK_BAD_C_SEGMENTS
740 else if (c_seg
->c_on_bad_q
) {
741 queue_remove(&c_bad_list_head
, c_seg
, c_segment_t
, c_age_list
);
742 c_seg
->c_on_bad_q
= 0;
746 segno
= c_seg
->c_mysegno
;
747 c_segments
[segno
].c_segno
= c_free_segno_head
;
748 c_free_segno_head
= segno
;
751 lck_mtx_unlock_always(c_list_lock
);
753 if (c_seg
->c_wanted
) {
754 thread_wakeup((event_t
) (c_seg
));
757 if (c_seg
->c_busy_swapping
) {
758 c_seg
->c_must_free
= 1;
760 lck_mtx_unlock_always(&c_seg
->c_lock
);
763 if (c_seg
->c_ondisk
== 0) {
764 pages_populated
= (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
))) / PAGE_SIZE
;
766 c_buffer
= c_seg
->c_store
.c_buffer
;
767 c_seg
->c_store
.c_buffer
= NULL
;
770 * Free swap space on disk.
772 c_swap_handle
= c_seg
->c_store
.c_swap_handle
;
773 c_seg
->c_store
.c_swap_handle
= (uint64_t)-1;
775 lck_mtx_unlock_always(&c_seg
->c_lock
);
778 kernel_memory_depopulate(kernel_map
, (vm_offset_t
) c_buffer
, pages_populated
* PAGE_SIZE
, KMA_COMPRESSOR
);
780 kmem_free(kernel_map
, (vm_offset_t
) c_buffer
, C_SEG_ALLOCSIZE
);
781 } else if (c_swap_handle
)
782 vm_swap_free(c_swap_handle
);
785 #if __i386__ || __x86_64__
786 lck_mtx_destroy(&c_seg
->c_lock
, &vm_compressor_lck_grp
);
787 #else /* __i386__ || __x86_64__ */
788 lck_spin_destroy(&c_seg
->c_lock
, &vm_compressor_lck_grp
);
789 #endif /* __i386__ || __x86_64__ */
791 for (i
= 0; i
< C_SEG_SLOT_ARRAYS
; i
++) {
792 if (c_seg
->c_slots
[i
] == 0)
795 kfree((char *)c_seg
->c_slots
[i
], sizeof(struct c_slot
) * C_SEG_SLOT_ARRAY_SIZE
);
797 zfree(compressor_segment_zone
, c_seg
);
801 int c_seg_trim_page_count
= 0;
804 c_seg_trim_tail(c_segment_t c_seg
)
809 uint32_t c_rounded_size
;
810 uint16_t current_nextslot
;
811 uint32_t current_populated_offset
;
813 if (c_seg
->c_bytes_used
== 0)
815 current_nextslot
= c_seg
->c_nextslot
;
816 current_populated_offset
= c_seg
->c_populated_offset
;
818 while (c_seg
->c_nextslot
) {
820 cs
= C_SEG_SLOT_FROM_INDEX(c_seg
, (c_seg
->c_nextslot
- 1));
822 c_size
= UNPACK_C_SIZE(cs
);
825 if (current_nextslot
!= c_seg
->c_nextslot
) {
826 c_rounded_size
= (c_size
+ C_SEG_OFFSET_ALIGNMENT_MASK
) & ~C_SEG_OFFSET_ALIGNMENT_MASK
;
827 c_offset
= cs
->c_offset
+ C_SEG_BYTES_TO_OFFSET(c_rounded_size
);
829 c_seg
->c_nextoffset
= c_offset
;
830 c_seg
->c_populated_offset
= (c_offset
+ (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE
) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE
) - 1);
832 if (c_seg
->c_firstemptyslot
> c_seg
->c_nextslot
)
833 c_seg
->c_firstemptyslot
= c_seg
->c_nextslot
;
835 c_seg_trim_page_count
+= ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset
)) -
836 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
))) / PAGE_SIZE
);
842 assert(c_seg
->c_nextslot
);
847 c_seg_minor_compaction_and_unlock(c_segment_t c_seg
, boolean_t clear_busy
)
849 c_slot_mapping_t slot_ptr
;
850 uint32_t c_offset
= 0;
851 uint32_t old_populated_offset
;
852 uint32_t c_rounded_size
;
858 boolean_t need_unlock
= TRUE
;
860 assert(c_seg
->c_busy
);
862 #if VALIDATE_C_SEGMENTS
863 c_seg_validate(c_seg
, FALSE
);
865 if (c_seg
->c_bytes_used
== 0) {
869 if (c_seg
->c_firstemptyslot
>= c_seg
->c_nextslot
|| C_SEG_UNUSED_BYTES(c_seg
) < PAGE_SIZE
)
872 #if VALIDATE_C_SEGMENTS
873 c_seg
->c_was_minor_compacted
++;
875 c_indx
= c_seg
->c_firstemptyslot
;
876 c_dst
= C_SEG_SLOT_FROM_INDEX(c_seg
, c_indx
);
878 old_populated_offset
= c_seg
->c_populated_offset
;
879 c_offset
= c_dst
->c_offset
;
881 for (i
= c_indx
+ 1; i
< c_seg
->c_nextslot
&& c_offset
< c_seg
->c_nextoffset
; i
++) {
883 c_src
= C_SEG_SLOT_FROM_INDEX(c_seg
, i
);
885 c_size
= UNPACK_C_SIZE(c_src
);
890 memcpy(&c_seg
->c_store
.c_buffer
[c_offset
], &c_seg
->c_store
.c_buffer
[c_src
->c_offset
], c_size
);
892 #if CHECKSUM_THE_DATA
893 c_dst
->c_hash_data
= c_src
->c_hash_data
;
895 #if CHECKSUM_THE_COMPRESSED_DATA
896 c_dst
->c_hash_compressed_data
= c_src
->c_hash_compressed_data
;
898 c_dst
->c_size
= c_src
->c_size
;
899 c_dst
->c_packed_ptr
= c_src
->c_packed_ptr
;
900 c_dst
->c_offset
= c_offset
;
902 slot_ptr
= (c_slot_mapping_t
)C_SLOT_UNPACK_PTR(c_dst
);
903 slot_ptr
->s_cindx
= c_indx
;
905 c_rounded_size
= (c_size
+ C_SEG_OFFSET_ALIGNMENT_MASK
) & ~C_SEG_OFFSET_ALIGNMENT_MASK
;
907 c_offset
+= C_SEG_BYTES_TO_OFFSET(c_rounded_size
);
908 PACK_C_SIZE(c_src
, 0);
911 c_dst
= C_SEG_SLOT_FROM_INDEX(c_seg
, c_indx
);
913 c_seg
->c_firstemptyslot
= c_indx
;
914 c_seg
->c_nextslot
= c_indx
;
915 c_seg
->c_nextoffset
= c_offset
;
916 c_seg
->c_populated_offset
= (c_offset
+ (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE
) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE
) - 1);
917 c_seg
->c_bytes_unused
= 0;
919 #if VALIDATE_C_SEGMENTS
920 c_seg_validate(c_seg
, TRUE
);
923 if (old_populated_offset
> c_seg
->c_populated_offset
) {
927 gc_size
= C_SEG_OFFSET_TO_BYTES(old_populated_offset
- c_seg
->c_populated_offset
);
928 gc_ptr
= &c_seg
->c_store
.c_buffer
[c_seg
->c_populated_offset
];
930 lck_mtx_unlock_always(&c_seg
->c_lock
);
932 kernel_memory_depopulate(kernel_map
, (vm_offset_t
)gc_ptr
, gc_size
, KMA_COMPRESSOR
);
934 if (clear_busy
== TRUE
)
935 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
940 if (need_unlock
== TRUE
) {
941 if (clear_busy
== TRUE
)
942 C_SEG_WAKEUP_DONE(c_seg
);
944 lck_mtx_unlock_always(&c_seg
->c_lock
);
952 uint64_t asked_permission
;
953 uint64_t compactions
;
954 uint64_t moved_slots
;
955 uint64_t moved_bytes
;
956 uint64_t wasted_space_in_swapouts
;
957 uint64_t count_of_swapouts
;
958 } c_seg_major_compact_stats
;
961 #define C_MAJOR_COMPACTION_AGE_APPROPRIATE 30
962 #define C_MAJOR_COMPACTION_OLD_ENOUGH 300
963 #define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 80) / 100)
967 c_seg_major_compact_ok(
968 c_segment_t c_seg_dst
,
969 c_segment_t c_seg_src
)
972 c_seg_major_compact_stats
.asked_permission
++;
974 if (c_seg_src
->c_filling
) {
976 * we're at or near the head... don't compact
980 if (c_seg_src
->c_bytes_used
>= C_MAJOR_COMPACTION_SIZE_APPROPRIATE
&&
981 c_seg_dst
->c_bytes_used
>= C_MAJOR_COMPACTION_SIZE_APPROPRIATE
)
984 if (c_seg_dst
->c_nextoffset
>= C_SEG_OFF_LIMIT
|| c_seg_dst
->c_nextslot
>= C_SLOT_MAX
) {
986 * destination segment is full... can't compact
997 c_segment_t c_seg_dst
,
998 c_segment_t c_seg_src
)
1000 c_slot_mapping_t slot_ptr
;
1001 uint32_t c_rounded_size
;
1008 boolean_t keep_compacting
= TRUE
;
1011 * segments are not locked but they are both marked c_busy
1012 * which keeps c_decompress from working on them...
1013 * we can safely allocate new pages, move compressed data
1014 * from c_seg_src to c_seg_dst and update both c_segment's
1015 * state w/o holding the master lock
1018 #if VALIDATE_C_SEGMENTS
1019 c_seg_dst
->c_was_major_compacted
++;
1020 c_seg_src
->c_was_major_donor
++;
1022 c_seg_major_compact_stats
.compactions
++;
1024 dst_slot
= c_seg_dst
->c_nextslot
;
1026 for (i
= 0; i
< c_seg_src
->c_nextslot
; i
++) {
1028 c_src
= C_SEG_SLOT_FROM_INDEX(c_seg_src
, i
);
1030 c_size
= UNPACK_C_SIZE(c_src
);
1033 /* BATCH: move what we have so far; */
1037 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst
->c_populated_offset
- c_seg_dst
->c_nextoffset
) < (unsigned) c_size
) {
1039 if ((C_SEG_OFFSET_TO_BYTES(c_seg_dst
->c_populated_offset
) == C_SEG_BUFSIZE
)) {
1041 keep_compacting
= FALSE
;
1044 kernel_memory_populate(kernel_map
,
1045 (vm_offset_t
) &c_seg_dst
->c_store
.c_buffer
[c_seg_dst
->c_populated_offset
],
1049 c_seg_dst
->c_populated_offset
+= C_SEG_BYTES_TO_OFFSET(PAGE_SIZE
);
1050 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst
->c_populated_offset
) <= C_SEG_BUFSIZE
);
1053 slotarray
= C_SEG_SLOTARRAY_FROM_INDEX(c_seg_dst
, c_seg_dst
->c_nextslot
);
1055 if (c_seg_dst
->c_slots
[slotarray
] == 0) {
1056 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
1057 c_seg_dst
->c_slots
[slotarray
] = (struct c_slot
*)
1058 kalloc(sizeof(struct c_slot
) *
1059 C_SEG_SLOT_ARRAY_SIZE
);
1060 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1062 c_dst
= C_SEG_SLOT_FROM_INDEX(c_seg_dst
, c_seg_dst
->c_nextslot
);
1064 memcpy(&c_seg_dst
->c_store
.c_buffer
[c_seg_dst
->c_nextoffset
], &c_seg_src
->c_store
.c_buffer
[c_src
->c_offset
], c_size
);
1066 c_rounded_size
= (c_size
+ C_SEG_OFFSET_ALIGNMENT_MASK
) & ~C_SEG_OFFSET_ALIGNMENT_MASK
;
1068 c_seg_major_compact_stats
.moved_slots
++;
1069 c_seg_major_compact_stats
.moved_bytes
+= c_size
;
1071 #if CHECKSUM_THE_DATA
1072 c_dst
->c_hash_data
= c_src
->c_hash_data
;
1074 #if CHECKSUM_THE_COMPRESSED_DATA
1075 c_dst
->c_hash_compressed_data
= c_src
->c_hash_compressed_data
;
1077 c_dst
->c_size
= c_src
->c_size
;
1078 c_dst
->c_packed_ptr
= c_src
->c_packed_ptr
;
1079 c_dst
->c_offset
= c_seg_dst
->c_nextoffset
;
1081 if (c_seg_dst
->c_firstemptyslot
== c_seg_dst
->c_nextslot
)
1082 c_seg_dst
->c_firstemptyslot
++;
1083 c_seg_dst
->c_nextslot
++;
1084 c_seg_dst
->c_bytes_used
+= c_rounded_size
;
1085 c_seg_dst
->c_nextoffset
+= C_SEG_BYTES_TO_OFFSET(c_rounded_size
);
1087 PACK_C_SIZE(c_src
, 0);
1089 c_seg_src
->c_bytes_used
-= c_rounded_size
;
1090 c_seg_src
->c_bytes_unused
+= c_rounded_size
;
1091 c_seg_src
->c_firstemptyslot
= 0;
1093 if (c_seg_dst
->c_nextoffset
>= C_SEG_OFF_LIMIT
|| c_seg_dst
->c_nextslot
>= C_SLOT_MAX
) {
1094 /* dest segment is now full */
1095 keep_compacting
= FALSE
;
1099 if (dst_slot
< c_seg_dst
->c_nextslot
) {
1101 PAGE_REPLACEMENT_ALLOWED(TRUE
);
1103 * we've now locked out c_decompress from
1104 * converting the slot passed into it into
1105 * a c_segment_t which allows us to use
1106 * the backptr to change which c_segment and
1107 * index the slot points to
1109 while (dst_slot
< c_seg_dst
->c_nextslot
) {
1111 c_dst
= C_SEG_SLOT_FROM_INDEX(c_seg_dst
, dst_slot
);
1113 slot_ptr
= (c_slot_mapping_t
)C_SLOT_UNPACK_PTR(c_dst
);
1114 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
1115 slot_ptr
->s_cseg
= c_seg_dst
->c_mysegno
+ 1;
1116 slot_ptr
->s_cindx
= dst_slot
++;
1118 PAGE_REPLACEMENT_ALLOWED(FALSE
);
1120 return (keep_compacting
);
1125 compute_elapsed_msecs(clock_sec_t end_sec
, clock_nsec_t end_nsec
, clock_sec_t start_sec
, clock_nsec_t start_nsec
)
1128 uint64_t start_msecs
;
1130 end_msecs
= (end_sec
* 1000) + end_nsec
/ 1000000;
1131 start_msecs
= (start_sec
* 1000) + start_nsec
/ 1000000;
1133 return (end_msecs
- start_msecs
);
1138 uint32_t compressor_eval_period_in_msecs
= 250;
1139 uint32_t compressor_sample_min_in_msecs
= 500;
1140 uint32_t compressor_sample_max_in_msecs
= 10000;
1141 uint32_t compressor_thrashing_threshold_per_10msecs
= 50;
1142 uint32_t compressor_thrashing_min_per_10msecs
= 20;
1144 extern uint32_t vm_page_filecache_min
;
1148 compute_swapout_target_age(void)
1150 clock_sec_t cur_ts_sec
;
1151 clock_nsec_t cur_ts_nsec
;
1152 uint32_t min_operations_needed_in_this_sample
;
1153 uint64_t elapsed_msecs_in_eval
;
1154 uint64_t elapsed_msecs_in_sample
;
1155 boolean_t need_sample_reset
= FALSE
;
1156 boolean_t need_eval_reset
= FALSE
;
1158 clock_get_system_nanotime(&cur_ts_sec
, &cur_ts_nsec
);
1160 elapsed_msecs_in_sample
= compute_elapsed_msecs(cur_ts_sec
, cur_ts_nsec
, start_of_sample_period_sec
, start_of_sample_period_nsec
);
1162 if (elapsed_msecs_in_sample
>= compressor_sample_max_in_msecs
) {
1163 need_sample_reset
= TRUE
;
1164 need_eval_reset
= TRUE
;
1167 elapsed_msecs_in_eval
= compute_elapsed_msecs(cur_ts_sec
, cur_ts_nsec
, start_of_eval_period_sec
, start_of_eval_period_nsec
);
1169 if (elapsed_msecs_in_eval
< compressor_eval_period_in_msecs
)
1171 need_eval_reset
= TRUE
;
1173 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START
, elapsed_msecs_in_eval
, sample_period_compression_count
, sample_period_decompression_count
, 0, 0);
1175 min_operations_needed_in_this_sample
= (compressor_thrashing_min_per_10msecs
* (uint32_t)elapsed_msecs_in_eval
) / 10;
1177 if ((sample_period_compression_count
- last_eval_compression_count
) < min_operations_needed_in_this_sample
||
1178 (sample_period_decompression_count
- last_eval_decompression_count
) < min_operations_needed_in_this_sample
) {
1180 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END
, sample_period_compression_count
- last_eval_compression_count
,
1181 sample_period_decompression_count
- last_eval_decompression_count
, 0, 1, 0);
1183 swapout_target_age
= 0;
1185 need_sample_reset
= TRUE
;
1186 need_eval_reset
= TRUE
;
1189 last_eval_compression_count
= sample_period_compression_count
;
1190 last_eval_decompression_count
= sample_period_decompression_count
;
1192 if (elapsed_msecs_in_sample
< compressor_sample_min_in_msecs
) {
1194 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END
, swapout_target_age
, 0, 0, 5, 0);
1197 if (sample_period_decompression_count
> ((compressor_thrashing_threshold_per_10msecs
* elapsed_msecs_in_sample
) / 10)) {
1199 uint64_t running_total
;
1200 uint64_t working_target
;
1201 uint64_t aging_target
;
1202 uint32_t oldest_age_of_csegs_sampled
= 0;
1203 uint64_t working_set_approximation
= 0;
1205 swapout_target_age
= 0;
1207 working_target
= (sample_period_decompression_count
/ 100) * 95; /* 95 percent */
1208 aging_target
= (sample_period_decompression_count
/ 100) * 1; /* 1 percent */
1211 for (oldest_age_of_csegs_sampled
= 0; oldest_age_of_csegs_sampled
< DECOMPRESSION_SAMPLE_MAX_AGE
; oldest_age_of_csegs_sampled
++) {
1213 running_total
+= age_of_decompressions_during_sample_period
[oldest_age_of_csegs_sampled
];
1215 working_set_approximation
+= oldest_age_of_csegs_sampled
* age_of_decompressions_during_sample_period
[oldest_age_of_csegs_sampled
];
1217 if (running_total
>= working_target
)
1220 if (oldest_age_of_csegs_sampled
< DECOMPRESSION_SAMPLE_MAX_AGE
) {
1222 working_set_approximation
= (working_set_approximation
* 1000) / elapsed_msecs_in_sample
;
1224 if (working_set_approximation
< VM_PAGE_COMPRESSOR_COUNT
) {
1226 running_total
= overage_decompressions_during_sample_period
;
1228 for (oldest_age_of_csegs_sampled
= DECOMPRESSION_SAMPLE_MAX_AGE
- 1; oldest_age_of_csegs_sampled
; oldest_age_of_csegs_sampled
--) {
1229 running_total
+= age_of_decompressions_during_sample_period
[oldest_age_of_csegs_sampled
];
1231 if (running_total
>= aging_target
)
1234 swapout_target_age
= (uint32_t)cur_ts_sec
- oldest_age_of_csegs_sampled
;
1236 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END
, swapout_target_age
, working_set_approximation
, VM_PAGE_COMPRESSOR_COUNT
, 2, 0);
1238 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END
, working_set_approximation
, VM_PAGE_COMPRESSOR_COUNT
, 0, 3, 0);
1241 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END
, working_target
, running_total
, 0, 4, 0);
1243 need_sample_reset
= TRUE
;
1244 need_eval_reset
= TRUE
;
1246 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END
, sample_period_decompression_count
, (compressor_thrashing_threshold_per_10msecs
* elapsed_msecs_in_sample
) / 10, 0, 6, 0);
1248 if (need_sample_reset
== TRUE
) {
1249 bzero(age_of_decompressions_during_sample_period
, sizeof(age_of_decompressions_during_sample_period
));
1250 overage_decompressions_during_sample_period
= 0;
1252 start_of_sample_period_sec
= cur_ts_sec
;
1253 start_of_sample_period_nsec
= cur_ts_nsec
;
1254 sample_period_decompression_count
= 0;
1255 sample_period_compression_count
= 0;
1256 last_eval_decompression_count
= 0;
1257 last_eval_compression_count
= 0;
1259 if (need_eval_reset
== TRUE
) {
1260 start_of_eval_period_sec
= cur_ts_sec
;
1261 start_of_eval_period_nsec
= cur_ts_nsec
;
1267 int calls_since_last_considered
= 0;
1268 int compaction_swapper_running
= 0;
1269 int compaction_swapper_abort
= 0;
1273 boolean_t
memorystatus_kill_on_VM_thrashing(boolean_t
);
1274 int compressor_thrashing_induced_jetsam
= 0;
1275 boolean_t vm_compressor_thrashing_detected
= FALSE
;
1276 #endif /* CONFIG_JETSAM */
1279 compressor_needs_to_swap(void)
1281 boolean_t should_swap
= FALSE
;
1283 if (vm_swap_up
== TRUE
) {
1284 if (COMPRESSOR_NEEDS_TO_SWAP()) {
1287 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external
) && vm_page_anonymous_count
< (vm_page_inactive_count
/ 20)) {
1290 if (vm_page_free_count
< (vm_page_free_reserved
- COMPRESSOR_FREE_RESERVED_LIMIT
))
1293 compute_swapout_target_age();
1295 if (swapout_target_age
) {
1298 lck_mtx_lock_spin_always(c_list_lock
);
1300 if (!queue_empty(&c_age_list_head
)) {
1302 c_seg
= (c_segment_t
) queue_first(&c_age_list_head
);
1304 if (c_seg
->c_creation_ts
<= swapout_target_age
)
1307 swapout_target_age
= 0;
1309 lck_mtx_unlock_always(c_list_lock
);
1312 if (vm_swap_up
== FALSE
) {
1315 if (vm_compressor_thrashing_detected
== FALSE
) {
1316 vm_compressor_thrashing_detected
= TRUE
;
1317 memorystatus_kill_on_VM_thrashing(TRUE
/* async */);
1318 compressor_thrashing_induced_jetsam
++;
1320 * let the jetsam take precedence over
1321 * any major compactions we might have
1322 * been able to do... otherwise we run
1323 * the risk of doing major compactions
1324 * on segments we're about to free up
1325 * due to the jetsam activity.
1327 should_swap
= FALSE
;
1330 #endif /* CONFIG_JETSAM */
1331 if (COMPRESSOR_NEEDS_TO_MAJOR_COMPACT())
1335 * returning TRUE when swap_supported == FALSE
1336 * will cause the major compaction engine to
1337 * run, but will not trigger any swapping...
1338 * segments that have been major compacted
1339 * will be moved to the swapped_out_q
1340 * but will not have the c_ondisk flag set
1342 return (should_swap
);
1346 vm_compressor_total_compressions(void)
1348 processor_t processor
= processor_list
;
1349 vm_statistics64_t stat
= &PROCESSOR_DATA(processor
, vm_stat
);
1351 uint64_t compressions
= stat
->compressions
;
1353 if (processor_count
> 1) {
1354 simple_lock(&processor_list_lock
);
1356 while ((processor
= processor
->processor_list
) != NULL
) {
1357 stat
= &PROCESSOR_DATA(processor
, vm_stat
);
1358 compressions
+= stat
->compressions
;
1361 simple_unlock(&processor_list_lock
);
1364 return compressions
;
1367 uint32_t vm_wake_compactor_swapper_calls
= 0;
1370 vm_wake_compactor_swapper(void)
1372 if (compaction_swapper_running
)
1375 if (c_minor_count
== 0)
1378 lck_mtx_lock_spin_always(c_list_lock
);
1380 fastwake_warmup
= FALSE
;
1382 if (compaction_swapper_running
== 0) {
1383 vm_wake_compactor_swapper_calls
++;
1385 thread_wakeup((event_t
)&c_compressor_swap_trigger
);
1387 compaction_swapper_running
= 1;
1389 lck_mtx_unlock_always(c_list_lock
);
1393 vm_consider_waking_compactor_swapper(void)
1395 boolean_t need_wakeup
= FALSE
;
1397 if (calls_since_last_considered
++ < 1000 || compaction_swapper_running
)
1399 calls_since_last_considered
= 0;
1401 if (c_minor_count
&& (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) {
1405 } else if (compressor_needs_to_swap()) {
1409 } else if (c_minor_count
) {
1410 uint64_t total_bytes
;
1412 total_bytes
= compressor_object
->resident_page_count
* PAGE_SIZE_64
;
1414 if ((total_bytes
- compressor_bytes_used
) > total_bytes
/ 10)
1417 if (need_wakeup
== TRUE
) {
1419 lck_mtx_lock_spin_always(c_list_lock
);
1421 fastwake_warmup
= FALSE
;
1423 if (compaction_swapper_running
== 0) {
1424 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER
, DBG_FUNC_NONE
);
1426 thread_wakeup((event_t
)&c_compressor_swap_trigger
);
1428 compaction_swapper_running
= 1;
1430 lck_mtx_unlock_always(c_list_lock
);
1435 #define C_SWAPOUT_LIMIT 4
1436 #define DELAYED_COMPACTIONS_PER_PASS 30
1439 vm_compressor_do_delayed_compactions(boolean_t flush_all
)
1442 int number_compacted
= 0;
1443 boolean_t needs_to_swap
= FALSE
;
1446 lck_mtx_assert(c_list_lock
, LCK_MTX_ASSERT_OWNED
);
1448 while (!queue_empty(&c_minor_list_head
) && needs_to_swap
== FALSE
) {
1450 c_seg
= (c_segment_t
)queue_first(&c_minor_list_head
);
1452 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1455 c_seg_do_minor_compaction_and_unlock(c_seg
, TRUE
, FALSE
, TRUE
);
1457 if (vm_swap_up
== TRUE
&& (number_compacted
++ > DELAYED_COMPACTIONS_PER_PASS
)) {
1459 if ((flush_all
== TRUE
|| compressor_needs_to_swap() == TRUE
) && c_swapout_count
< C_SWAPOUT_LIMIT
)
1460 needs_to_swap
= TRUE
;
1462 number_compacted
= 0;
1464 lck_mtx_lock_spin_always(c_list_lock
);
1469 #define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10
1472 vm_compressor_age_swapped_in_segments(boolean_t flush_all
)
1478 clock_get_system_nanotime(&now
, &nsec
);
1480 while (!queue_empty(&c_swappedin_list_head
)) {
1482 c_seg
= (c_segment_t
)queue_first(&c_swappedin_list_head
);
1484 if (flush_all
== FALSE
&& (now
- c_seg
->c_swappedin_ts
) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT
)
1487 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1489 queue_remove(&c_swappedin_list_head
, c_seg
, c_segment_t
, c_age_list
);
1490 c_seg
->c_on_swappedin_q
= 0;
1491 c_swappedin_count
--;
1493 c_seg_insert_into_q(&c_age_list_head
, c_seg
);
1494 c_seg
->c_on_age_q
= 1;
1497 lck_mtx_unlock_always(&c_seg
->c_lock
);
1503 vm_compressor_flush(void)
1505 uint64_t vm_swap_put_failures_at_start
;
1506 wait_result_t wait_result
= 0;
1507 AbsoluteTime startTime
, endTime
;
1508 clock_sec_t now_sec
;
1509 clock_nsec_t now_nsec
;
1512 HIBLOG("vm_compressor_flush - starting\n");
1514 clock_get_uptime(&startTime
);
1516 lck_mtx_lock_spin_always(c_list_lock
);
1518 fastwake_warmup
= FALSE
;
1519 compaction_swapper_abort
= 1;
1521 while (compaction_swapper_running
) {
1522 assert_wait((event_t
)&compaction_swapper_running
, THREAD_UNINT
);
1524 lck_mtx_unlock_always(c_list_lock
);
1526 thread_block(THREAD_CONTINUE_NULL
);
1528 lck_mtx_lock_spin_always(c_list_lock
);
1530 compaction_swapper_abort
= 0;
1531 compaction_swapper_running
= 1;
1533 hibernate_flushing
= TRUE
;
1534 hibernate_no_swapspace
= FALSE
;
1535 c_generation_id_flush_barrier
= c_generation_id
+ 1000;
1537 clock_get_system_nanotime(&now_sec
, &now_nsec
);
1538 hibernate_flushing_deadline
= now_sec
+ HIBERNATE_FLUSHING_SECS_TO_COMPLETE
;
1540 vm_swap_put_failures_at_start
= vm_swap_put_failures
;
1542 vm_compressor_compact_and_swap(TRUE
);
1544 while (!queue_empty(&c_swapout_list_head
)) {
1546 assert_wait_timeout((event_t
) &compaction_swapper_running
, THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
1548 lck_mtx_unlock_always(c_list_lock
);
1550 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
1552 lck_mtx_lock_spin_always(c_list_lock
);
1554 if (wait_result
== THREAD_TIMED_OUT
)
1557 hibernate_flushing
= FALSE
;
1558 compaction_swapper_running
= 0;
1560 if (vm_swap_put_failures
> vm_swap_put_failures_at_start
)
1561 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n",
1562 vm_swap_put_failures
- vm_swap_put_failures_at_start
, VM_PAGE_COMPRESSOR_COUNT
);
1564 lck_mtx_unlock_always(c_list_lock
);
1566 clock_get_uptime(&endTime
);
1567 SUB_ABSOLUTETIME(&endTime
, &startTime
);
1568 absolutetime_to_nanoseconds(endTime
, &nsec
);
1570 HIBLOG("vm_compressor_flush completed - took %qd msecs\n", nsec
/ 1000000ULL);
1575 int compaction_swap_trigger_thread_awakened
= 0;
1578 vm_compressor_swap_trigger_thread(void)
1581 lck_mtx_lock_spin_always(c_list_lock
);
1583 compaction_swap_trigger_thread_awakened
++;
1585 vm_compressor_compact_and_swap(FALSE
);
1587 assert_wait((event_t
)&c_compressor_swap_trigger
, THREAD_UNINT
);
1589 compaction_swapper_running
= 0;
1590 thread_wakeup((event_t
)&compaction_swapper_running
);
1592 lck_mtx_unlock_always(c_list_lock
);
1594 thread_block((thread_continue_t
)vm_compressor_swap_trigger_thread
);
1601 vm_compressor_record_warmup_start(void)
1605 lck_mtx_lock_spin_always(c_list_lock
);
1607 if (!queue_empty(&c_age_list_head
)) {
1609 c_seg
= (c_segment_t
)queue_last(&c_age_list_head
);
1611 first_c_segment_to_warm_generation_id
= c_seg
->c_generation_id
;
1613 first_c_segment_to_warm_generation_id
= 0;
1615 fastwake_recording_in_progress
= TRUE
;
1617 lck_mtx_unlock_always(c_list_lock
);
1622 vm_compressor_record_warmup_end(void)
1626 lck_mtx_lock_spin_always(c_list_lock
);
1628 if (!queue_empty(&c_age_list_head
)) {
1630 c_seg
= (c_segment_t
)queue_last(&c_age_list_head
);
1632 last_c_segment_to_warm_generation_id
= c_seg
->c_generation_id
;
1634 last_c_segment_to_warm_generation_id
= first_c_segment_to_warm_generation_id
;
1636 fastwake_recording_in_progress
= FALSE
;
1638 lck_mtx_unlock_always(c_list_lock
);
1642 #define DELAY_TRIM_ON_WAKE_SECS 4
1645 vm_compressor_do_warmup(void)
1650 clock_get_system_nanotime(&sec
, &nsec
);
1651 dont_trim_until_ts
= sec
+ DELAY_TRIM_ON_WAKE_SECS
;
1653 if (first_c_segment_to_warm_generation_id
== last_c_segment_to_warm_generation_id
)
1656 lck_mtx_lock_spin_always(c_list_lock
);
1658 if (compaction_swapper_running
== 0) {
1660 fastwake_warmup
= TRUE
;
1661 compaction_swapper_running
= 1;
1662 thread_wakeup((event_t
)&c_compressor_swap_trigger
);
1664 lck_mtx_unlock_always(c_list_lock
);
1669 do_fastwake_warmup(void)
1671 uint64_t my_thread_id
;
1672 c_segment_t c_seg
= NULL
;
1674 lck_mtx_unlock_always(c_list_lock
);
1676 my_thread_id
= current_thread()->thread_id
;
1677 proc_set_task_policy_thread(kernel_task
, my_thread_id
,
1678 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, THROTTLE_LEVEL_COMPRESSOR_TIER2
);
1680 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
1682 lck_mtx_lock_spin_always(c_list_lock
);
1684 while (!queue_empty(&c_swappedout_list_head
) && fastwake_warmup
== TRUE
) {
1686 c_seg
= (c_segment_t
) queue_first(&c_swappedout_list_head
);
1688 if (c_seg
->c_generation_id
< first_c_segment_to_warm_generation_id
||
1689 c_seg
->c_generation_id
> last_c_segment_to_warm_generation_id
)
1692 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1693 lck_mtx_unlock_always(c_list_lock
);
1696 c_seg_wait_on_busy(c_seg
);
1698 c_seg_swapin(c_seg
, TRUE
);
1700 lck_mtx_unlock_always(&c_seg
->c_lock
);
1702 c_segment_warmup_count
++;
1703 vm_pageout_io_throttle();
1705 lck_mtx_lock_spin_always(c_list_lock
);
1707 lck_mtx_unlock_always(c_list_lock
);
1709 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
1711 proc_set_task_policy_thread(kernel_task
, my_thread_id
,
1712 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, THROTTLE_LEVEL_COMPRESSOR_TIER0
);
1714 lck_mtx_lock_spin_always(c_list_lock
);
1719 vm_compressor_compact_and_swap(boolean_t flush_all
)
1721 c_segment_t c_seg
, c_seg_next
;
1722 boolean_t keep_compacting
;
1725 if (fastwake_warmup
== TRUE
) {
1726 uint64_t starting_warmup_count
;
1728 starting_warmup_count
= c_segment_warmup_count
;
1730 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 11) | DBG_FUNC_START
, c_segment_warmup_count
,
1731 first_c_segment_to_warm_generation_id
, last_c_segment_to_warm_generation_id
, 0, 0);
1732 do_fastwake_warmup();
1733 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 11) | DBG_FUNC_END
, c_segment_warmup_count
, c_segment_warmup_count
- starting_warmup_count
, 0, 0, 0);
1735 fastwake_warmup
= FALSE
;
1738 while (!queue_empty(&c_age_list_head
) && compaction_swapper_abort
== 0) {
1740 if (hibernate_flushing
== TRUE
) {
1744 if (hibernate_should_abort()) {
1745 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
1748 if (hibernate_no_swapspace
== TRUE
) {
1749 HIBLOG("vm_compressor_flush - out of swap space\n");
1752 clock_get_system_nanotime(&sec
, &nsec
);
1754 if (sec
> hibernate_flushing_deadline
) {
1755 HIBLOG("vm_compressor_flush - failed to finish before deadline\n");
1759 if (c_swapout_count
>= C_SWAPOUT_LIMIT
) {
1761 assert_wait_timeout((event_t
) &compaction_swapper_running
, THREAD_INTERRUPTIBLE
, 100, 1000*NSEC_PER_USEC
);
1763 lck_mtx_unlock_always(c_list_lock
);
1765 thread_block(THREAD_CONTINUE_NULL
);
1767 lck_mtx_lock_spin_always(c_list_lock
);
1772 vm_compressor_do_delayed_compactions(flush_all
);
1774 vm_compressor_age_swapped_in_segments(flush_all
);
1776 if (c_swapout_count
>= C_SWAPOUT_LIMIT
) {
1778 * we timed out on the above thread_block
1779 * let's loop around and try again
1780 * the timeout allows us to continue
1781 * to do minor compactions to make
1782 * more memory available
1788 * Swap out segments?
1790 if (flush_all
== FALSE
) {
1791 boolean_t needs_to_swap
;
1793 lck_mtx_unlock_always(c_list_lock
);
1795 needs_to_swap
= compressor_needs_to_swap();
1797 lck_mtx_lock_spin_always(c_list_lock
);
1799 if (needs_to_swap
== FALSE
)
1802 if (queue_empty(&c_age_list_head
))
1804 c_seg
= (c_segment_t
) queue_first(&c_age_list_head
);
1806 if (flush_all
== TRUE
&& c_seg
->c_generation_id
> c_generation_id_flush_barrier
)
1809 if (c_seg
->c_filling
) {
1811 * we're at or near the head... no more work to do
1815 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1817 if (c_seg
->c_busy
) {
1819 lck_mtx_unlock_always(c_list_lock
);
1820 c_seg_wait_on_busy(c_seg
);
1821 lck_mtx_lock_spin_always(c_list_lock
);
1827 if (c_seg_do_minor_compaction_and_unlock(c_seg
, FALSE
, TRUE
, TRUE
)) {
1829 * found an empty c_segment and freed it
1830 * so go grab the next guy in the queue
1837 keep_compacting
= TRUE
;
1839 while (keep_compacting
== TRUE
) {
1841 assert(c_seg
->c_busy
);
1843 /* look for another segment to consolidate */
1845 c_seg_next
= (c_segment_t
) queue_next(&c_seg
->c_age_list
);
1847 if (queue_end(&c_age_list_head
, (queue_entry_t
)c_seg_next
))
1850 if (c_seg_major_compact_ok(c_seg
, c_seg_next
) == FALSE
)
1853 lck_mtx_lock_spin_always(&c_seg_next
->c_lock
);
1855 if (c_seg_next
->c_busy
) {
1857 lck_mtx_unlock_always(c_list_lock
);
1858 c_seg_wait_on_busy(c_seg_next
);
1859 lck_mtx_lock_spin_always(c_list_lock
);
1863 /* grab that segment */
1864 c_seg_next
->c_busy
= 1;
1866 if (c_seg_do_minor_compaction_and_unlock(c_seg_next
, FALSE
, TRUE
, TRUE
)) {
1868 * found an empty c_segment and freed it
1869 * so we can't continue to use c_seg_next
1874 /* unlock the list ... */
1875 lck_mtx_unlock_always(c_list_lock
);
1877 /* do the major compaction */
1879 keep_compacting
= c_seg_major_compact(c_seg
, c_seg_next
);
1881 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
1883 lck_mtx_lock_spin_always(&c_seg_next
->c_lock
);
1885 * run a minor compaction on the donor segment
1886 * since we pulled at least some of it's
1887 * data into our target... if we've emptied
1888 * it, now is a good time to free it which
1889 * c_seg_minor_compaction_and_unlock also takes care of
1891 * by passing TRUE, we ask for c_busy to be cleared
1892 * and c_wanted to be taken care of
1894 c_seg_minor_compaction_and_unlock(c_seg_next
, TRUE
);
1896 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
1898 /* relock the list */
1899 lck_mtx_lock_spin_always(c_list_lock
);
1901 } /* major compaction */
1903 c_seg_major_compact_stats
.wasted_space_in_swapouts
+= C_SEG_BUFSIZE
- c_seg
->c_bytes_used
;
1904 c_seg_major_compact_stats
.count_of_swapouts
++;
1906 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1908 assert(c_seg
->c_busy
);
1909 assert(c_seg
->c_on_age_q
);
1910 assert(!c_seg
->c_on_minorcompact_q
);
1912 queue_remove(&c_age_list_head
, c_seg
, c_segment_t
, c_age_list
);
1913 c_seg
->c_on_age_q
= 0;
1916 if (vm_swap_up
== TRUE
) {
1917 queue_enter(&c_swapout_list_head
, c_seg
, c_segment_t
, c_age_list
);
1918 c_seg
->c_on_swapout_q
= 1;
1921 queue_enter(&c_swappedout_list_head
, c_seg
, c_segment_t
, c_age_list
);
1922 c_seg
->c_on_swappedout_q
= 1;
1923 c_swappedout_count
++;
1925 C_SEG_WAKEUP_DONE(c_seg
);
1927 lck_mtx_unlock_always(&c_seg
->c_lock
);
1929 if (c_swapout_count
) {
1930 lck_mtx_unlock_always(c_list_lock
);
1932 thread_wakeup((event_t
)&c_swapout_list_head
);
1934 lck_mtx_lock_spin_always(c_list_lock
);
1941 c_seg_allocate(c_segment_t
*current_chead
)
1948 if ( (c_seg
= *current_chead
) == NULL
) {
1951 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
1953 lck_mtx_lock_spin_always(c_list_lock
);
1955 while (c_segments_busy
== TRUE
) {
1956 assert_wait((event_t
) (&c_segments_busy
), THREAD_UNINT
);
1958 lck_mtx_unlock_always(c_list_lock
);
1960 thread_block(THREAD_CONTINUE_NULL
);
1962 lck_mtx_lock_spin_always(c_list_lock
);
1964 if (c_free_segno_head
== (uint32_t)-1) {
1966 if (c_segments_available
>= c_segments_limit
|| c_segment_pages_compressed
>= c_segment_pages_compressed_limit
) {
1967 lck_mtx_unlock_always(c_list_lock
);
1969 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END
, 0, 0, 0, 1, 0);
1972 c_segments_busy
= TRUE
;
1973 lck_mtx_unlock_always(c_list_lock
);
1975 kernel_memory_populate(kernel_map
, (vm_offset_t
)c_segments_next_page
, PAGE_SIZE
, KMA_KOBJECT
);
1976 c_segments_next_page
+= PAGE_SIZE
;
1978 for (c_segno
= c_segments_available
+ 1; c_segno
< (c_segments_available
+ C_SEGMENTS_PER_PAGE
); c_segno
++)
1979 c_segments
[c_segno
- 1].c_segno
= c_segno
;
1981 lck_mtx_lock_spin_always(c_list_lock
);
1983 c_segments
[c_segno
- 1].c_segno
= c_free_segno_head
;
1984 c_free_segno_head
= c_segments_available
;
1985 c_segments_available
+= C_SEGMENTS_PER_PAGE
;
1987 c_segments_busy
= FALSE
;
1988 thread_wakeup((event_t
) (&c_segments_busy
));
1990 c_segno
= c_free_segno_head
;
1991 c_free_segno_head
= c_segments
[c_segno
].c_segno
;
1993 lck_mtx_unlock_always(c_list_lock
);
1995 c_seg
= (c_segment_t
)zalloc(compressor_segment_zone
);
1996 bzero((char *)c_seg
, sizeof(struct c_segment
));
1998 if (kernel_memory_allocate(kernel_map
, (vm_offset_t
*)(&c_seg
->c_store
.c_buffer
), C_SEG_ALLOCSIZE
, 0, KMA_COMPRESSOR
| KMA_VAONLY
) != KERN_SUCCESS
) {
1999 zfree(compressor_segment_zone
, c_seg
);
2001 lck_mtx_lock_spin_always(c_list_lock
);
2003 c_segments
[c_segno
].c_segno
= c_free_segno_head
;
2004 c_free_segno_head
= c_segno
;
2006 lck_mtx_unlock_always(c_list_lock
);
2008 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END
, 0, 0, 0, 2, 0);
2013 #if __i386__ || __x86_64__
2014 lck_mtx_init(&c_seg
->c_lock
, &vm_compressor_lck_grp
, &vm_compressor_lck_attr
);
2015 #else /* __i386__ || __x86_64__ */
2016 lck_spin_init(&c_seg
->c_lock
, &vm_compressor_lck_grp
, &vm_compressor_lck_attr
);
2017 #endif /* __i386__ || __x86_64__ */
2019 kernel_memory_populate(kernel_map
, (vm_offset_t
)(c_seg
->c_store
.c_buffer
), 3 * PAGE_SIZE
, KMA_COMPRESSOR
);
2021 c_seg
->c_populated_offset
= C_SEG_BYTES_TO_OFFSET(3 * PAGE_SIZE
);
2022 c_seg
->c_firstemptyslot
= C_SLOT_MAX
;
2023 c_seg
->c_mysegno
= c_segno
;
2024 c_seg
->c_filling
= 1;
2026 lck_mtx_lock_spin_always(c_list_lock
);
2029 c_segments
[c_segno
].c_seg
= c_seg
;
2031 c_seg
->c_generation_id
= c_generation_id
++;
2033 queue_enter(&c_age_list_head
, c_seg
, c_segment_t
, c_age_list
);
2034 c_seg
->c_on_age_q
= 1;
2037 lck_mtx_unlock_always(c_list_lock
);
2039 clock_get_system_nanotime(&sec
, &nsec
);
2040 c_seg
->c_creation_ts
= (uint32_t)sec
;
2042 *current_chead
= c_seg
;
2044 KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END
, c_seg
, 0, 0, 3, 0);
2046 slotarray
= C_SEG_SLOTARRAY_FROM_INDEX(c_seg
, c_seg
->c_nextslot
);
2048 if (c_seg
->c_slots
[slotarray
] == 0) {
2049 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START
, 0, 0, 0, 0, 0);
2051 c_seg
->c_slots
[slotarray
] = (struct c_slot
*)kalloc(sizeof(struct c_slot
) * C_SEG_SLOT_ARRAY_SIZE
);
2053 KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END
, 0, 0, 0, 0, 0);
2056 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
2058 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2066 c_current_seg_filled(c_segment_t c_seg
, c_segment_t
*current_chead
)
2068 uint32_t unused_bytes
;
2069 uint32_t offset_to_depopulate
;
2071 unused_bytes
= trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
- c_seg
->c_nextoffset
));
2075 offset_to_depopulate
= C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_nextoffset
)));
2078 * release the extra physical page(s) at the end of the segment
2080 lck_mtx_unlock_always(&c_seg
->c_lock
);
2082 kernel_memory_depopulate(
2084 (vm_offset_t
) &c_seg
->c_store
.c_buffer
[offset_to_depopulate
],
2088 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2090 c_seg
->c_populated_offset
= offset_to_depopulate
;
2092 c_seg
->c_filling
= 0;
2094 if (C_SEG_UNUSED_BYTES(c_seg
) >= PAGE_SIZE
)
2095 c_seg_need_delayed_compaction(c_seg
);
2097 lck_mtx_unlock_always(&c_seg
->c_lock
);
2099 *current_chead
= NULL
;
2104 * returns with c_seg locked
2107 c_seg_swapin_requeue(c_segment_t c_seg
)
2112 clock_get_system_nanotime(&sec
, &nsec
);
2114 lck_mtx_lock_spin_always(c_list_lock
);
2115 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2117 if (c_seg
->c_on_swappedout_q
) {
2118 queue_remove(&c_swappedout_list_head
, c_seg
, c_segment_t
, c_age_list
);
2119 c_seg
->c_on_swappedout_q
= 0;
2120 c_swappedout_count
--;
2122 assert(c_seg
->c_on_swappedout_sparse_q
);
2124 queue_remove(&c_swappedout_sparse_list_head
, c_seg
, c_segment_t
, c_age_list
);
2125 c_seg
->c_on_swappedout_sparse_q
= 0;
2126 c_swappedout_sparse_count
--;
2128 if (c_seg
->c_store
.c_buffer
) {
2129 queue_enter(&c_swappedin_list_head
, c_seg
, c_segment_t
, c_age_list
);
2130 c_seg
->c_on_swappedin_q
= 1;
2131 c_swappedin_count
++;
2133 #if TRACK_BAD_C_SEGMENTS
2135 queue_enter(&c_bad_list_head
, c_seg
, c_segment_t
, c_age_list
);
2136 c_seg
->c_on_bad_q
= 1;
2140 c_seg
->c_swappedin_ts
= (uint32_t)sec
;
2141 c_seg
->c_ondisk
= 0;
2142 c_seg
->c_was_swapped_in
= 1;
2144 lck_mtx_unlock_always(c_list_lock
);
2150 * c_seg has to be locked and is returned locked.
2151 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE
2155 c_seg_swapin(c_segment_t c_seg
, boolean_t force_minor_compaction
)
2157 vm_offset_t addr
= 0;
2158 uint32_t io_size
= 0;
2161 #if !CHECKSUM_THE_SWAP
2162 if (c_seg
->c_ondisk
)
2163 c_seg_trim_tail(c_seg
);
2165 io_size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
2166 f_offset
= c_seg
->c_store
.c_swap_handle
;
2169 lck_mtx_unlock_always(&c_seg
->c_lock
);
2171 if (c_seg
->c_ondisk
) {
2173 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
2175 if (kernel_memory_allocate(kernel_map
, &addr
, C_SEG_ALLOCSIZE
, 0, KMA_COMPRESSOR
| KMA_VAONLY
) != KERN_SUCCESS
)
2176 panic("c_seg_swapin: kernel_memory_allocate failed\n");
2178 kernel_memory_populate(kernel_map
, addr
, io_size
, KMA_COMPRESSOR
);
2180 if (vm_swap_get(addr
, f_offset
, io_size
) != KERN_SUCCESS
) {
2181 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
2183 kernel_memory_depopulate(kernel_map
, addr
, io_size
, KMA_COMPRESSOR
);
2184 kmem_free(kernel_map
, addr
, C_SEG_ALLOCSIZE
);
2186 c_seg
->c_store
.c_buffer
= (int32_t*) NULL
;
2188 c_seg
->c_store
.c_buffer
= (int32_t*) addr
;
2190 vm_swap_decrypt(c_seg
);
2193 #if CHECKSUM_THE_SWAP
2194 if (c_seg
->cseg_swap_size
!= io_size
)
2195 panic("swapin size doesn't match swapout size");
2197 if (c_seg
->cseg_hash
!= hash_string((char*) c_seg
->c_store
.c_buffer
, (int)io_size
)) {
2198 panic("c_seg_swapin - Swap hash mismatch\n");
2200 #endif /* CHECKSUM_THE_SWAP */
2202 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
2204 if (force_minor_compaction
== TRUE
) {
2205 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2207 c_seg_minor_compaction_and_unlock(c_seg
, FALSE
);
2209 OSAddAtomic64(c_seg
->c_bytes_used
, &compressor_bytes_used
);
2212 c_seg_swapin_requeue(c_seg
);
2214 C_SEG_WAKEUP_DONE(c_seg
);
2219 c_compress_page(char *src
, c_slot_mapping_t slot_ptr
, c_segment_t
*current_chead
, char *scratch_buf
)
2227 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START
, *current_chead
, 0, 0, 0, 0);
2229 if ((c_seg
= c_seg_allocate(current_chead
)) == NULL
)
2232 * returns with c_seg lock held
2233 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)
2235 cs
= C_SEG_SLOT_FROM_INDEX(c_seg
, c_seg
->c_nextslot
);
2237 cs
->c_packed_ptr
= C_SLOT_PACK_PTR(slot_ptr
);
2238 cs
->c_offset
= c_seg
->c_nextoffset
;
2240 max_csize
= C_SEG_BUFSIZE
- C_SEG_OFFSET_TO_BYTES((int32_t)cs
->c_offset
);
2242 if (max_csize
> PAGE_SIZE
)
2243 max_csize
= PAGE_SIZE
;
2245 if (C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
-
2246 c_seg
->c_nextoffset
)
2247 < (unsigned) max_csize
+ PAGE_SIZE
&&
2248 (C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
)
2249 < C_SEG_ALLOCSIZE
)) {
2250 lck_mtx_unlock_always(&c_seg
->c_lock
);
2252 kernel_memory_populate(kernel_map
,
2253 (vm_offset_t
) &c_seg
->c_store
.c_buffer
[c_seg
->c_populated_offset
],
2257 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2259 c_seg
->c_populated_offset
+= C_SEG_BYTES_TO_OFFSET(PAGE_SIZE
);
2262 #if CHECKSUM_THE_DATA
2263 cs
->c_hash_data
= hash_string(src
, PAGE_SIZE
);
2265 c_size
= WKdm_compress_new((WK_word
*)(uintptr_t)src
, (WK_word
*)(uintptr_t)&c_seg
->c_store
.c_buffer
[cs
->c_offset
],
2266 (WK_word
*)(uintptr_t)scratch_buf
, max_csize
- 4);
2268 assert(c_size
<= (max_csize
- 4) && c_size
>= -1);
2272 if (max_csize
< PAGE_SIZE
) {
2273 c_current_seg_filled(c_seg
, current_chead
);
2275 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
2281 memcpy(&c_seg
->c_store
.c_buffer
[cs
->c_offset
], src
, c_size
);
2283 #if CHECKSUM_THE_COMPRESSED_DATA
2284 cs
->c_hash_compressed_data
= hash_string((char *)&c_seg
->c_store
.c_buffer
[cs
->c_offset
], c_size
);
2286 c_rounded_size
= (c_size
+ C_SEG_OFFSET_ALIGNMENT_MASK
) & ~C_SEG_OFFSET_ALIGNMENT_MASK
;
2288 PACK_C_SIZE(cs
, c_size
);
2289 c_seg
->c_bytes_used
+= c_rounded_size
;
2290 c_seg
->c_nextoffset
+= C_SEG_BYTES_TO_OFFSET(c_rounded_size
);
2292 slot_ptr
->s_cindx
= c_seg
->c_nextslot
++;
2293 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
2294 slot_ptr
->s_cseg
= c_seg
->c_mysegno
+ 1;
2296 if (c_seg
->c_nextoffset
>= C_SEG_OFF_LIMIT
|| c_seg
->c_nextslot
>= C_SLOT_MAX
)
2297 c_current_seg_filled(c_seg
, current_chead
);
2299 lck_mtx_unlock_always(&c_seg
->c_lock
);
2301 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
2303 OSAddAtomic64(c_rounded_size
, &compressor_bytes_used
);
2304 OSAddAtomic64(PAGE_SIZE
, &c_segment_input_bytes
);
2305 OSAddAtomic64(c_size
, &c_segment_compressed_bytes
);
2307 OSAddAtomic(1, &c_segment_pages_compressed
);
2308 OSAddAtomic(1, &sample_period_compression_count
);
2310 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END
, *current_chead
, c_size
, c_segment_input_bytes
, c_segment_compressed_bytes
, 0);
2312 if (vm_compressor_low_on_space()) {
2313 ipc_port_t trigger
= IP_NULL
;
2316 if (IP_VALID(min_pages_trigger_port
)) {
2317 trigger
= min_pages_trigger_port
;
2318 min_pages_trigger_port
= IP_NULL
;
2322 if (IP_VALID(trigger
)) {
2323 no_paging_space_action();
2324 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
2325 ipc_port_release_send(trigger
);
2333 c_decompress_page(char *dst
, volatile c_slot_mapping_t slot_ptr
, int flags
, int *zeroslot
)
2341 boolean_t c_seg_has_data
= TRUE
;
2342 boolean_t c_seg_swappedin
= FALSE
;
2343 boolean_t need_unlock
= TRUE
;
2344 boolean_t consider_defragmenting
= FALSE
;
2349 if (lck_rw_try_lock_shared(&c_decompressor_lock
) == 0) {
2350 if (flags
& C_DONT_BLOCK
) {
2354 lck_rw_lock_shared(&c_decompressor_lock
);
2358 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
2360 /* s_cseg is actually "segno+1" */
2361 c_seg
= c_segments
[slot_ptr
->s_cseg
- 1].c_seg
;
2363 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2365 if (flags
& C_DONT_BLOCK
) {
2366 if (c_seg
->c_busy
|| c_seg
->c_ondisk
) {
2374 if (c_seg
->c_busy
) {
2376 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
2379 lck_rw_done(&c_decompressor_lock
);
2381 c_seg_wait_on_busy(c_seg
);
2385 c_indx
= slot_ptr
->s_cindx
;
2387 cs
= C_SEG_SLOT_FROM_INDEX(c_seg
, c_indx
);
2389 c_size
= UNPACK_C_SIZE(cs
);
2391 c_rounded_size
= (c_size
+ C_SEG_OFFSET_ALIGNMENT_MASK
) & ~C_SEG_OFFSET_ALIGNMENT_MASK
;
2394 uint32_t age_of_cseg
;
2395 clock_sec_t cur_ts_sec
;
2396 clock_nsec_t cur_ts_nsec
;
2398 if (c_seg
->c_on_swappedout_q
|| c_seg
->c_on_swappedout_sparse_q
) {
2399 if (c_seg
->c_ondisk
)
2400 c_seg_swappedin
= TRUE
;
2401 c_seg_swapin(c_seg
, FALSE
);
2403 if (c_seg
->c_store
.c_buffer
== NULL
) {
2404 c_seg_has_data
= FALSE
;
2405 goto c_seg_invalid_data
;
2407 #if CHECKSUM_THE_COMPRESSED_DATA
2408 if (cs
->c_hash_compressed_data
!= hash_string((char *)&c_seg
->c_store
.c_buffer
[cs
->c_offset
], c_size
))
2409 panic("compressed data doesn't match original");
2411 if (c_rounded_size
== PAGE_SIZE
) {
2413 * page wasn't compressible... just copy it out
2415 memcpy(dst
, &c_seg
->c_store
.c_buffer
[cs
->c_offset
], PAGE_SIZE
);
2421 * we're behind the c_seg lock held in spin mode
2422 * which means pre-emption is disabled... therefore
2423 * the following sequence is atomic and safe
2425 my_cpu_no
= cpu_number();
2427 assert(my_cpu_no
< compressor_cpus
);
2429 scratch_buf
= &compressor_scratch_bufs
[my_cpu_no
* WKdm_SCRATCH_BUF_SIZE
];
2431 WKdm_decompress_new((WK_word
*)(uintptr_t)&c_seg
->c_store
.c_buffer
[cs
->c_offset
],
2432 (WK_word
*)(uintptr_t)dst
, (WK_word
*)(uintptr_t)scratch_buf
, c_size
);
2435 #if CHECKSUM_THE_DATA
2436 if (cs
->c_hash_data
!= hash_string(dst
, PAGE_SIZE
))
2437 panic("decompressed data doesn't match original");
2439 if (!c_seg
->c_was_swapped_in
) {
2441 clock_get_system_nanotime(&cur_ts_sec
, &cur_ts_nsec
);
2443 age_of_cseg
= (uint32_t)cur_ts_sec
- c_seg
->c_creation_ts
;
2445 if (age_of_cseg
< DECOMPRESSION_SAMPLE_MAX_AGE
)
2446 OSAddAtomic(1, &age_of_decompressions_during_sample_period
[age_of_cseg
]);
2448 OSAddAtomic(1, &overage_decompressions_during_sample_period
);
2450 OSAddAtomic(1, &sample_period_decompression_count
);
2453 if (c_seg
->c_store
.c_buffer
== NULL
)
2454 c_seg_has_data
= FALSE
;
2458 if (c_seg_has_data
== TRUE
) {
2459 if (c_seg_swappedin
== TRUE
)
2466 if (flags
& C_KEEP
) {
2470 c_seg
->c_bytes_unused
+= c_rounded_size
;
2471 c_seg
->c_bytes_used
-= c_rounded_size
;
2474 if (c_indx
< c_seg
->c_firstemptyslot
)
2475 c_seg
->c_firstemptyslot
= c_indx
;
2477 OSAddAtomic(-1, &c_segment_pages_compressed
);
2479 if (c_seg_has_data
== TRUE
&& !c_seg
->c_ondisk
) {
2481 * c_ondisk == TRUE can occur when we're doing a
2482 * free of a compressed page (i.e. dst == NULL)
2484 OSAddAtomic64(-c_rounded_size
, &compressor_bytes_used
);
2486 if (!c_seg
->c_filling
) {
2487 if (c_seg
->c_bytes_used
== 0) {
2488 if (c_seg
->c_on_minorcompact_q
|| c_seg
->c_on_swappedout_sparse_q
) {
2489 if (c_seg_try_free(c_seg
) == TRUE
)
2490 need_unlock
= FALSE
;
2493 need_unlock
= FALSE
;
2495 } else if (c_seg
->c_on_minorcompact_q
) {
2497 if (C_SEG_INCORE_IS_SPARSE(c_seg
)) {
2498 c_seg_try_minor_compaction_and_unlock(c_seg
);
2499 need_unlock
= FALSE
;
2501 } else if (!c_seg
->c_ondisk
) {
2503 if (c_seg_has_data
== TRUE
&& !c_seg
->c_on_swapout_q
&& C_SEG_UNUSED_BYTES(c_seg
) >= PAGE_SIZE
) {
2504 c_seg_need_delayed_compaction(c_seg
);
2506 } else if (!c_seg
->c_on_swappedout_sparse_q
&& C_SEG_ONDISK_IS_SPARSE(c_seg
)) {
2508 c_seg_move_to_sparse_list(c_seg
);
2509 consider_defragmenting
= TRUE
;
2513 if (need_unlock
== TRUE
)
2514 lck_mtx_unlock_always(&c_seg
->c_lock
);
2516 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
2518 if (consider_defragmenting
== TRUE
)
2519 vm_swap_consider_defragmenting();
2522 lck_rw_done(&c_decompressor_lock
);
2529 vm_compressor_get(ppnum_t pn
, int *slot
, int flags
)
2536 dst
= PHYSMAP_PTOV((uint64_t)pn
<< (uint64_t)PAGE_SHIFT
);
2538 #error "unsupported architecture"
2541 retval
= c_decompress_page(dst
, (c_slot_mapping_t
)slot
, flags
, &zeroslot
);
2544 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
2545 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set
2549 * We've just decompressed a page, and are about to hand that back to VM for
2550 * re-entry into some pmap. This is a decompression operation which must have no
2551 * impact on the pmap's physical footprint. However, when VM goes to re-enter
2552 * this page into the pmap, it doesn't know that it came from the compressor,
2553 * which means the pmap's physical footprint will be incremented. To compensate
2554 * for that, we decrement the physical footprint here, so that the total net effect
2555 * on the physical footprint statistic is zero.
2557 pmap_ledger_debit(current_task()->map
->pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
2562 * returns 0 if we successfully decompressed a page from a segment already in memory
2563 * returns 1 if we had to first swap in the segment, before successfully decompressing the page
2564 * returns -1 if we encountered an error swapping in the segment - decompression failed
2565 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set
2572 vm_compressor_free(int *slot
)
2576 (void)c_decompress_page(NULL
, (c_slot_mapping_t
)slot
, 0, &zeroslot
);
2583 vm_compressor_put(ppnum_t pn
, int *slot
, void **current_chead
, char *scratch_buf
)
2588 if ((vm_offset_t
)slot
< VM_MIN_KERNEL_AND_KEXT_ADDRESS
|| (vm_offset_t
)slot
>= VM_MAX_KERNEL_ADDRESS
)
2589 panic("vm_compressor_put: slot 0x%llx address out of range [0x%llx:0x%llx]",
2590 (uint64_t)(vm_offset_t
) slot
,
2591 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS
,
2592 (uint64_t) VM_MAX_KERNEL_ADDRESS
);
2595 src
= PHYSMAP_PTOV((uint64_t)pn
<< (uint64_t)PAGE_SHIFT
);
2597 #error "unsupported architecture"
2599 retval
= c_compress_page(src
, (c_slot_mapping_t
)slot
, (c_segment_t
*)current_chead
, scratch_buf
);