]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_compressor.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor.c
CommitLineData
39236c6e
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <vm/vm_compressor.h>
fe8ab488
A
30
31#if CONFIG_PHANTOM_CACHE
32#include <vm/vm_phantom_cache.h>
33#endif
34
39236c6e
A
35#include <vm/vm_map.h>
36#include <vm/vm_pageout.h>
37#include <vm/memory_object.h>
39037602
A
38#include <vm/vm_compressor_algorithms.h>
39#include <vm/vm_fault.h>
39236c6e
A
40#include <mach/mach_host.h> /* for host_info() */
41#include <kern/ledger.h>
39037602 42#include <kern/policy_internal.h>
39236c6e 43
3e170ce0
A
44#include <i386/misc_protos.h>
45
39236c6e
A
46#include <IOKit/IOHibernatePrivate.h>
47
48/*
49 * vm_compressor_mode has a heirarchy of control to set its value.
50 * boot-args are checked first, then device-tree, and finally
51 * the default value that is defined below. See vm_fault_init() for
52 * the boot-arg & device-tree code.
53 */
54
39236c6e
A
55int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP;
56int vm_scale = 16;
57
58
04b8595b 59int vm_compressor_is_active = 0;
39236c6e 60int vm_compression_limit = 0;
3e170ce0 61int vm_compressor_available = 0;
39236c6e 62
39236c6e
A
63extern void vm_pageout_io_throttle(void);
64
65#if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA
66extern unsigned int hash_string(char *cp, int len);
67#endif
68
3e170ce0
A
69#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? PAGE_SIZE : cs->c_size)
70#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size))
fe8ab488 71
39236c6e 72
3e170ce0
A
73struct c_sv_hash_entry {
74 union {
75 struct {
76 uint32_t c_sv_he_ref;
77 uint32_t c_sv_he_data;
78 } c_sv_he;
79 uint64_t c_sv_he_record;
80
81 } c_sv_he_un;
39236c6e
A
82};
83
3e170ce0
A
84#define he_ref c_sv_he_un.c_sv_he.c_sv_he_ref
85#define he_data c_sv_he_un.c_sv_he.c_sv_he_data
86#define he_record c_sv_he_un.c_sv_he_record
87
88#define C_SV_HASH_MAX_MISS 32
89#define C_SV_HASH_SIZE ((1 << 10))
90#define C_SV_HASH_MASK ((1 << 10) - 1)
91#define C_SV_CSEG_ID ((1 << 22) - 1)
39236c6e
A
92
93
94struct c_slot_mapping {
95 uint32_t s_cseg:22, /* segment number + 1 */
96 s_cindx:10; /* index in the segment */
97};
3e170ce0 98#define C_SLOT_MAX_INDEX (1 << 10)
39236c6e
A
99
100typedef struct c_slot_mapping *c_slot_mapping_t;
101
102
103union c_segu {
104 c_segment_t c_seg;
39037602 105 uintptr_t c_segno;
39236c6e
A
106};
107
108
109
39037602
A
110#define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) KERNEL_PMAP_HEAP_RANGE_START) >> 2)
111#define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) KERNEL_PMAP_HEAP_RANGE_START)
39236c6e
A
112
113
114uint32_t c_segment_count = 0;
3e170ce0 115uint32_t c_segment_count_max = 0;
39236c6e
A
116
117uint64_t c_generation_id = 0;
118uint64_t c_generation_id_flush_barrier;
119
120
121#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120
122
123boolean_t hibernate_no_swapspace = FALSE;
124clock_sec_t hibernate_flushing_deadline = 0;
125
126
3e170ce0
A
127#if RECORD_THE_COMPRESSED_DATA
128char *c_compressed_record_sbuf;
129char *c_compressed_record_ebuf;
130char *c_compressed_record_cptr;
39236c6e
A
131#endif
132
3e170ce0 133
39236c6e
A
134queue_head_t c_age_list_head;
135queue_head_t c_swapout_list_head;
136queue_head_t c_swappedin_list_head;
137queue_head_t c_swappedout_list_head;
138queue_head_t c_swappedout_sparse_list_head;
3e170ce0
A
139queue_head_t c_major_list_head;
140queue_head_t c_filling_list_head;
141queue_head_t c_bad_list_head;
39236c6e
A
142
143uint32_t c_age_count = 0;
144uint32_t c_swapout_count = 0;
145uint32_t c_swappedin_count = 0;
146uint32_t c_swappedout_count = 0;
147uint32_t c_swappedout_sparse_count = 0;
3e170ce0
A
148uint32_t c_major_count = 0;
149uint32_t c_filling_count = 0;
150uint32_t c_empty_count = 0;
151uint32_t c_bad_count = 0;
152
39236c6e
A
153
154queue_head_t c_minor_list_head;
155uint32_t c_minor_count = 0;
156
3e170ce0
A
157int c_overage_swapped_count = 0;
158int c_overage_swapped_limit = 0;
159
160int c_seg_fixed_array_len;
39236c6e 161union c_segu *c_segments;
3e170ce0
A
162vm_offset_t c_buffers;
163vm_size_t c_buffers_size;
39236c6e
A
164caddr_t c_segments_next_page;
165boolean_t c_segments_busy;
166uint32_t c_segments_available;
167uint32_t c_segments_limit;
fe8ab488 168uint32_t c_segments_nearing_limit;
3e170ce0
A
169
170uint32_t c_segment_svp_in_hash;
171uint32_t c_segment_svp_hash_succeeded;
172uint32_t c_segment_svp_hash_failed;
173uint32_t c_segment_svp_zero_compressions;
174uint32_t c_segment_svp_nonzero_compressions;
175uint32_t c_segment_svp_zero_decompressions;
176uint32_t c_segment_svp_nonzero_decompressions;
177
178uint32_t c_segment_noncompressible_pages;
179
39236c6e
A
180uint32_t c_segment_pages_compressed;
181uint32_t c_segment_pages_compressed_limit;
fe8ab488 182uint32_t c_segment_pages_compressed_nearing_limit;
39236c6e
A
183uint32_t c_free_segno_head = (uint32_t)-1;
184
185uint32_t vm_compressor_minorcompact_threshold_divisor = 10;
186uint32_t vm_compressor_majorcompact_threshold_divisor = 10;
187uint32_t vm_compressor_unthrottle_threshold_divisor = 10;
188uint32_t vm_compressor_catchup_threshold_divisor = 10;
189
190#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu))
191
192
193lck_grp_attr_t vm_compressor_lck_grp_attr;
194lck_attr_t vm_compressor_lck_attr;
195lck_grp_t vm_compressor_lck_grp;
39236c6e 196lck_mtx_t *c_list_lock;
39236c6e 197lck_rw_t c_master_lock;
fe8ab488 198boolean_t decompressions_blocked = FALSE;
39236c6e
A
199
200zone_t compressor_segment_zone;
201int c_compressor_swap_trigger = 0;
202
203uint32_t compressor_cpus;
204char *compressor_scratch_bufs;
3e170ce0
A
205char *kdp_compressor_scratch_buf;
206char *kdp_compressor_decompressed_page;
207addr64_t kdp_compressor_decompressed_page_paddr;
208ppnum_t kdp_compressor_decompressed_page_ppnum;
39236c6e
A
209
210clock_sec_t start_of_sample_period_sec = 0;
211clock_nsec_t start_of_sample_period_nsec = 0;
212clock_sec_t start_of_eval_period_sec = 0;
213clock_nsec_t start_of_eval_period_nsec = 0;
214uint32_t sample_period_decompression_count = 0;
215uint32_t sample_period_compression_count = 0;
216uint32_t last_eval_decompression_count = 0;
217uint32_t last_eval_compression_count = 0;
218
219#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30)
220
3e170ce0
A
221boolean_t vm_swapout_ripe_segments = FALSE;
222uint32_t vm_ripe_target_age = (60 * 60 * 48);
223
39236c6e
A
224uint32_t swapout_target_age = 0;
225uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE];
226uint32_t overage_decompressions_during_sample_period = 0;
227
d190cdc3 228void do_fastwake_warmup(queue_head_t *, boolean_t);
39236c6e
A
229boolean_t fastwake_warmup = FALSE;
230boolean_t fastwake_recording_in_progress = FALSE;
231clock_sec_t dont_trim_until_ts = 0;
232
233uint64_t c_segment_warmup_count;
234uint64_t first_c_segment_to_warm_generation_id = 0;
235uint64_t last_c_segment_to_warm_generation_id = 0;
236boolean_t hibernate_flushing = FALSE;
237
fe8ab488
A
238int64_t c_segment_input_bytes __attribute__((aligned(8))) = 0;
239int64_t c_segment_compressed_bytes __attribute__((aligned(8))) = 0;
240int64_t compressor_bytes_used __attribute__((aligned(8))) = 0;
3e170ce0
A
241
242
243struct c_sv_hash_entry c_segment_sv_hash_table[C_SV_HASH_SIZE] __attribute__ ((aligned (8)));
244
39236c6e
A
245
246static boolean_t compressor_needs_to_swap(void);
247static void vm_compressor_swap_trigger_thread(void);
248static void vm_compressor_do_delayed_compactions(boolean_t);
249static void vm_compressor_compact_and_swap(boolean_t);
250static void vm_compressor_age_swapped_in_segments(boolean_t);
39236c6e 251
3e170ce0
A
252static void vm_compressor_take_paging_space_action(void);
253
39236c6e
A
254boolean_t vm_compressor_low_on_space(void);
255
256void compute_swapout_target_age(void);
257
258boolean_t c_seg_major_compact(c_segment_t, c_segment_t);
259boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t);
260
261int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t);
262int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t);
263void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg);
39236c6e
A
264
265void c_seg_move_to_sparse_list(c_segment_t);
266void c_seg_insert_into_q(queue_head_t *, c_segment_t);
267
39236c6e 268uint64_t vm_available_memory(void);
fe8ab488 269uint64_t vm_compressor_pages_compressed(void);
39236c6e 270
39037602
A
271/*
272 * indicate the need to do a major compaction if
273 * the overall set of in-use compression segments
274 * becomes sparse... on systems that support pressure
275 * driven swapping, this will also cause swapouts to
276 * be initiated.
277 */
278static inline boolean_t vm_compressor_needs_to_major_compact()
279{
280 uint32_t incore_seg_count;
281
282 incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
283
284 if ((c_segment_count >= (c_segments_nearing_limit / 8)) &&
285 ((incore_seg_count * C_SEG_MAX_PAGES) - VM_PAGE_COMPRESSOR_COUNT) >
286 ((incore_seg_count / 8) * C_SEG_MAX_PAGES))
287 return (1);
288 return (0);
289}
290
39236c6e
A
291
292uint64_t
293vm_available_memory(void)
294{
295 return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64);
296}
297
298
fe8ab488
A
299uint64_t
300vm_compressor_pages_compressed(void)
301{
302 return (c_segment_pages_compressed * PAGE_SIZE_64);
303}
304
305
39236c6e
A
306boolean_t
307vm_compressor_low_on_space(void)
308{
fe8ab488
A
309 if ((c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) ||
310 (c_segment_count > c_segments_nearing_limit))
39236c6e
A
311 return (TRUE);
312
313 return (FALSE);
314}
315
316
317int
fe8ab488 318vm_wants_task_throttled(task_t task)
39236c6e 319{
fe8ab488
A
320 if (task == kernel_task)
321 return (0);
322
39037602 323 if (VM_CONFIG_SWAP_IS_ACTIVE) {
fe8ab488
A
324 if ((vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) &&
325 (unsigned int)pmap_compressed(task->map->pmap) > (c_segment_pages_compressed / 4))
39236c6e 326 return (1);
39236c6e
A
327 }
328 return (0);
329}
330
331
813fb2f6
A
332#if DEVELOPMENT || DEBUG
333boolean_t kill_on_no_paging_space = FALSE; /* On compressor/swap exhaustion, kill the largest process regardless of
334 * its chosen process policy. Controlled by a boot-arg of the same name. */
335#endif /* DEVELOPMENT || DEBUG */
336
3e170ce0
A
337
338static uint32_t no_paging_space_action_in_progress = 0;
339extern void memorystatus_send_low_swap_note(void);
340
341static void
342vm_compressor_take_paging_space_action(void)
343{
344 if (no_paging_space_action_in_progress == 0) {
345
346 if (OSCompareAndSwap(0, 1, (UInt32 *)&no_paging_space_action_in_progress)) {
347
348 if (no_paging_space_action()) {
813fb2f6
A
349#if DEVELOPMENT || DEBUG
350 if (kill_on_no_paging_space == TRUE) {
351 /*
352 * Since we are choosing to always kill a process, we don't need the
353 * "out of application memory" dialog box in this mode. And, hence we won't
354 * send the knote.
355 */
356 no_paging_space_action_in_progress = 0;
357 return;
358 }
359#endif /* DEVELOPMENT || DEBUG */
3e170ce0
A
360 memorystatus_send_low_swap_note();
361 }
362
363 no_paging_space_action_in_progress = 0;
364 }
365 }
366}
367
368
39236c6e
A
369void
370vm_compressor_init_locks(void)
371{
372 lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr);
373 lck_grp_init(&vm_compressor_lck_grp, "vm_compressor", &vm_compressor_lck_grp_attr);
374 lck_attr_setdefault(&vm_compressor_lck_attr);
375
376 lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
39236c6e
A
377}
378
379
380void
381vm_decompressor_lock(void)
382{
fe8ab488
A
383 PAGE_REPLACEMENT_ALLOWED(TRUE);
384
385 decompressions_blocked = TRUE;
386
387 PAGE_REPLACEMENT_ALLOWED(FALSE);
39236c6e
A
388}
389
390void
391vm_decompressor_unlock(void)
392{
fe8ab488
A
393 PAGE_REPLACEMENT_ALLOWED(TRUE);
394
395 decompressions_blocked = FALSE;
396
397 PAGE_REPLACEMENT_ALLOWED(FALSE);
39236c6e 398
fe8ab488 399 thread_wakeup((event_t)&decompressions_blocked);
39236c6e
A
400}
401
39037602
A
402static inline void cslot_copy(c_slot_t cdst, c_slot_t csrc) {
403#if CHECKSUM_THE_DATA
404 cdst->c_hash_data = csrc->c_hash_data;
405#endif
406#if CHECKSUM_THE_COMPRESSED_DATA
407 cdst->c_hash_compressed_data = csrc->c_hash_compressed_data;
408#endif
409 cdst->c_size = csrc->c_size;
410 cdst->c_packed_ptr = csrc->c_packed_ptr;
411}
39236c6e 412
39037602 413vm_map_t compressor_map;
39236c6e
A
414
415void
416vm_compressor_init(void)
417{
418 thread_t thread;
fe8ab488
A
419 struct c_slot cs_dummy;
420 c_slot_t cs = &cs_dummy;
3e170ce0
A
421 int c_segment_min_size;
422 int c_segment_padded_size;
39037602
A
423 kern_return_t retval = KERN_SUCCESS;
424 vm_offset_t start_addr = 0;
425 vm_size_t c_segments_arr_size = 0, compressor_submap_size = 0;
426#if RECORD_THE_COMPRESSED_DATA
427 vm_size_t c_compressed_record_sbuf_size = 0;
428#endif /* RECORD_THE_COMPRESSED_DATA */
fe8ab488 429
813fb2f6
A
430#if DEVELOPMENT || DEBUG
431 char bootarg_name[32];
432 if (PE_parse_boot_argn("-kill_on_no_paging_space", bootarg_name, sizeof (bootarg_name))) {
433 kill_on_no_paging_space = TRUE;
434 }
435#endif /* DEVELOPMENT || DEBUG */
436
fe8ab488
A
437 /*
438 * ensure that any pointer that gets created from
439 * the vm_page zone can be packed properly
440 */
441 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_min_address);
442
443 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_min_address)
444 panic("C_SLOT_UNPACK_PTR failed on zone_map_min_address - %p", (void *)zone_map_min_address);
445
446 cs->c_packed_ptr = C_SLOT_PACK_PTR(zone_map_max_address);
447
448 if (C_SLOT_UNPACK_PTR(cs) != (uintptr_t)zone_map_max_address)
449 panic("C_SLOT_UNPACK_PTR failed on zone_map_max_address - %p", (void *)zone_map_max_address);
450
39236c6e
A
451
452 assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE);
453
454 PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof (vm_compression_limit));
455
456 if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) {
457 vm_compressor_minorcompact_threshold_divisor = 11;
458 vm_compressor_majorcompact_threshold_divisor = 13;
459 vm_compressor_unthrottle_threshold_divisor = 20;
460 vm_compressor_catchup_threshold_divisor = 35;
461 } else {
462 vm_compressor_minorcompact_threshold_divisor = 20;
463 vm_compressor_majorcompact_threshold_divisor = 25;
464 vm_compressor_unthrottle_threshold_divisor = 35;
465 vm_compressor_catchup_threshold_divisor = 50;
466 }
467 /*
468 * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks
469 * c_master_lock needs to be available early so that "vm_page_find_contiguous" can
470 * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor.
471 */
472
39236c6e 473 c_list_lock = lck_mtx_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr);
3e170ce0 474
39236c6e 475 queue_init(&c_bad_list_head);
39236c6e
A
476 queue_init(&c_age_list_head);
477 queue_init(&c_minor_list_head);
3e170ce0
A
478 queue_init(&c_major_list_head);
479 queue_init(&c_filling_list_head);
39236c6e
A
480 queue_init(&c_swapout_list_head);
481 queue_init(&c_swappedin_list_head);
482 queue_init(&c_swappedout_list_head);
483 queue_init(&c_swappedout_sparse_list_head);
484
3e170ce0
A
485 c_segment_min_size = sizeof(struct c_segment) + (C_SEG_SLOT_VAR_ARRAY_MIN_LEN * sizeof(struct c_slot));
486
487 for (c_segment_padded_size = 128; c_segment_padded_size < c_segment_min_size; c_segment_padded_size = c_segment_padded_size << 1);
488
489 compressor_segment_zone = zinit(c_segment_padded_size, 128000 * c_segment_padded_size, PAGE_SIZE, "compressor_segment");
39236c6e
A
490 zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE);
491 zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE);
492
3e170ce0 493 c_seg_fixed_array_len = (c_segment_padded_size - sizeof(struct c_segment)) / sizeof(struct c_slot);
39236c6e
A
494
495 c_free_segno_head = -1;
496 c_segments_available = 0;
497
498 if (vm_compression_limit == 0) {
499 c_segment_pages_compressed_limit = (uint32_t)((max_mem / PAGE_SIZE)) * vm_scale;
500
501#define OLD_SWAP_LIMIT (1024 * 1024 * 16)
502#define MAX_SWAP_LIMIT (1024 * 1024 * 128)
503
504 if (c_segment_pages_compressed_limit > (OLD_SWAP_LIMIT))
505 c_segment_pages_compressed_limit = OLD_SWAP_LIMIT;
506
507 if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE_64))
508 c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE_64);
509 } else {
510 if (vm_compression_limit < MAX_SWAP_LIMIT)
511 c_segment_pages_compressed_limit = vm_compression_limit;
512 else
513 c_segment_pages_compressed_limit = MAX_SWAP_LIMIT;
514 }
515 if ((c_segments_limit = c_segment_pages_compressed_limit / (C_SEG_BUFSIZE / PAGE_SIZE)) > C_SEG_MAX_LIMIT)
516 c_segments_limit = C_SEG_MAX_LIMIT;
517
fe8ab488
A
518 c_segment_pages_compressed_nearing_limit = (c_segment_pages_compressed_limit * 98) / 100;
519 c_segments_nearing_limit = (c_segments_limit * 98) / 100;
520
39236c6e
A
521 c_segments_busy = FALSE;
522
39037602
A
523 /*
524 * Submap needs space for:
525 * - c_segments
526 * - c_buffers
527 * - swap reclaimations -- C_SEG_BUFSIZE
528 */
529 c_segments_arr_size = vm_map_round_page((sizeof(union c_segu) * c_segments_limit),VM_MAP_PAGE_MASK(kernel_map));
530 c_buffers_size = vm_map_round_page(((vm_size_t)C_SEG_ALLOCSIZE * (vm_size_t)c_segments_limit), VM_MAP_PAGE_MASK(kernel_map));
531
532 compressor_submap_size = c_segments_arr_size + c_buffers_size + C_SEG_BUFSIZE;
533
534#if RECORD_THE_COMPRESSED_DATA
535 c_compressed_record_sbuf_size = (vm_size_t)C_SEG_ALLOCSIZE + (PAGE_SIZE * 2);
536 compressor_submap_size += c_compressed_record_sbuf_size;
537#endif /* RECORD_THE_COMPRESSED_DATA */
538
539 retval = kmem_suballoc(kernel_map, &start_addr, compressor_submap_size,
540 FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT | VM_MAKE_TAG(0),
541 &compressor_map);
542
543 if (retval != KERN_SUCCESS)
544 panic("vm_compressor_init: kmem_suballoc failed");
545
546 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
3e170ce0 547 panic("vm_compressor_init: kernel_memory_allocate failed - c_segments\n");
39037602 548 if (kernel_memory_allocate(compressor_map, &c_buffers, c_buffers_size, 0, KMA_COMPRESSOR | KMA_VAONLY | KMA_PERMANENT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
3e170ce0 549 panic("vm_compressor_init: kernel_memory_allocate failed - c_buffers\n");
39236c6e
A
550
551 c_segments_next_page = (caddr_t)c_segments;
39037602 552 vm_compressor_algorithm_init();
39236c6e
A
553
554 {
555 host_basic_info_data_t hinfo;
556 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
557
558#define BSD_HOST 1
559 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
560
561 compressor_cpus = hinfo.max_cpus;
39037602 562 compressor_scratch_bufs = kalloc_tag(compressor_cpus * vm_compressor_get_decode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
39236c6e 563
39037602 564 kdp_compressor_scratch_buf = kalloc_tag(vm_compressor_get_decode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
3e170ce0
A
565 kdp_compressor_decompressed_page = kalloc_tag(PAGE_SIZE, VM_KERN_MEMORY_COMPRESSOR);
566 kdp_compressor_decompressed_page_paddr = kvtophys((vm_offset_t)kdp_compressor_decompressed_page);
567 kdp_compressor_decompressed_page_ppnum = (ppnum_t) atop(kdp_compressor_decompressed_page_paddr);
39236c6e 568 }
39037602
A
569#if CONFIG_FREEZE
570 freezer_compressor_scratch_buf = kalloc_tag(vm_compressor_get_encode_scratch_size(), VM_KERN_MEMORY_COMPRESSOR);
3e170ce0
A
571#endif
572
573#if RECORD_THE_COMPRESSED_DATA
39037602 574 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)&c_compressed_record_sbuf, c_compressed_record_sbuf_size, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS)
3e170ce0
A
575 panic("vm_compressor_init: kernel_memory_allocate failed - c_compressed_record_sbuf\n");
576
577 c_compressed_record_cptr = c_compressed_record_sbuf;
39037602 578 c_compressed_record_ebuf = c_compressed_record_sbuf + c_compressed_record_sbuf_size;
3e170ce0 579#endif
39236c6e
A
580
581 if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL,
582 BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
583 panic("vm_compressor_swap_trigger_thread: create failed");
584 }
39236c6e
A
585 thread_deallocate(thread);
586
39236c6e
A
587 if (vm_pageout_internal_start() != KERN_SUCCESS) {
588 panic("vm_compressor_init: Failed to start the internal pageout thread.\n");
589 }
39037602 590 if (VM_CONFIG_SWAP_IS_PRESENT)
fe8ab488 591 vm_compressor_swap_init();
fe8ab488 592
39037602 593 if (VM_CONFIG_COMPRESSOR_IS_ACTIVE)
04b8595b
A
594 vm_compressor_is_active = 1;
595
39236c6e
A
596#if CONFIG_FREEZE
597 memorystatus_freeze_enabled = TRUE;
598#endif /* CONFIG_FREEZE */
599
3e170ce0 600 vm_compressor_available = 1;
39236c6e
A
601
602 vm_page_reactivate_all_throttled();
603}
604
605
606#if VALIDATE_C_SEGMENTS
607
608static void
609c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact)
610{
611 int c_indx;
612 int32_t bytes_used;
613 int32_t bytes_unused;
614 uint32_t c_rounded_size;
615 uint32_t c_size;
616 c_slot_t cs;
617
618 if (c_seg->c_firstemptyslot < c_seg->c_nextslot) {
619 c_indx = c_seg->c_firstemptyslot;
620 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
621
622 if (cs == NULL)
623 panic("c_seg_validate: no slot backing c_firstemptyslot");
624
625 if (cs->c_size)
626 panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size);
627 }
628 bytes_used = 0;
629 bytes_unused = 0;
630
631 for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) {
632
633 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
634
635 c_size = UNPACK_C_SIZE(cs);
636
637 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
638
639 bytes_used += c_rounded_size;
640
641#if CHECKSUM_THE_COMPRESSED_DATA
642 if (c_size && cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))
643 panic("compressed data doesn't match original");
644#endif
645 }
646
647 if (bytes_used != c_seg->c_bytes_used)
648 panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used);
649
650 if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset))
651 panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
652 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
653
654 if (must_be_compact) {
655 if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset))
656 panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n",
657 (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used);
658 }
659}
660
661#endif
662
663
664void
39037602 665c_seg_need_delayed_compaction(c_segment_t c_seg, boolean_t c_list_lock_held)
39236c6e
A
666{
667 boolean_t clear_busy = FALSE;
668
39037602
A
669 if (c_list_lock_held == FALSE) {
670 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
671 C_SEG_BUSY(c_seg);
39236c6e 672
39037602
A
673 lck_mtx_unlock_always(&c_seg->c_lock);
674 lck_mtx_lock_spin_always(c_list_lock);
675 lck_mtx_lock_spin_always(&c_seg->c_lock);
39236c6e 676
39037602
A
677 clear_busy = TRUE;
678 }
39236c6e 679 }
3e170ce0
A
680 assert(c_seg->c_state != C_IS_FILLING);
681
682 if (!c_seg->c_on_minorcompact_q && !(C_SEG_IS_ONDISK(c_seg))) {
39236c6e
A
683 queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list);
684 c_seg->c_on_minorcompact_q = 1;
685 c_minor_count++;
686 }
39037602
A
687 if (c_list_lock_held == FALSE)
688 lck_mtx_unlock_always(c_list_lock);
39236c6e
A
689
690 if (clear_busy == TRUE)
691 C_SEG_WAKEUP_DONE(c_seg);
692}
693
694
695unsigned int c_seg_moved_to_sparse_list = 0;
696
697void
698c_seg_move_to_sparse_list(c_segment_t c_seg)
699{
700 boolean_t clear_busy = FALSE;
701
702 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
fe8ab488 703 C_SEG_BUSY(c_seg);
39236c6e
A
704
705 lck_mtx_unlock_always(&c_seg->c_lock);
706 lck_mtx_lock_spin_always(c_list_lock);
707 lck_mtx_lock_spin_always(&c_seg->c_lock);
708
709 clear_busy = TRUE;
710 }
3e170ce0 711 c_seg_switch_state(c_seg, C_ON_SWAPPEDOUTSPARSE_Q, FALSE);
39236c6e
A
712
713 c_seg_moved_to_sparse_list++;
714
715 lck_mtx_unlock_always(c_list_lock);
716
717 if (clear_busy == TRUE)
718 C_SEG_WAKEUP_DONE(c_seg);
719}
720
721
722void
723c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg)
724{
725 c_segment_t c_seg_next;
726
727 if (queue_empty(qhead)) {
728 queue_enter(qhead, c_seg, c_segment_t, c_age_list);
729 } else {
730 c_seg_next = (c_segment_t)queue_first(qhead);
731
732 while (TRUE) {
733
734 if (c_seg->c_generation_id < c_seg_next->c_generation_id) {
735 queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list);
736 break;
737 }
738 c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list);
739
740 if (queue_end(qhead, (queue_entry_t) c_seg_next)) {
741 queue_enter(qhead, c_seg, c_segment_t, c_age_list);
742 break;
743 }
744 }
745 }
746}
747
748
749int try_minor_compaction_failed = 0;
750int try_minor_compaction_succeeded = 0;
751
752void
753c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg)
754{
755
756 assert(c_seg->c_on_minorcompact_q);
757 /*
758 * c_seg is currently on the delayed minor compaction
759 * queue and we have c_seg locked... if we can get the
760 * c_list_lock w/o blocking (if we blocked we could deadlock
761 * because the lock order is c_list_lock then c_seg's lock)
762 * we'll pull it from the delayed list and free it directly
763 */
764 if ( !lck_mtx_try_lock_spin_always(c_list_lock)) {
765 /*
766 * c_list_lock is held, we need to bail
767 */
768 try_minor_compaction_failed++;
769
770 lck_mtx_unlock_always(&c_seg->c_lock);
771 } else {
772 try_minor_compaction_succeeded++;
773
fe8ab488 774 C_SEG_BUSY(c_seg);
39236c6e
A
775 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE);
776 }
777}
778
779
780int
781c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement)
782{
783 int c_seg_freed;
784
785 assert(c_seg->c_busy);
786
3e170ce0
A
787 /*
788 * check for the case that can occur when we are not swapping
789 * and this segment has been major compacted in the past
790 * and moved to the majorcompact q to remove it from further
791 * consideration... if the occupancy falls too low we need
792 * to put it back on the age_q so that it will be considered
793 * in the next major compaction sweep... if we don't do this
794 * we will eventually run into the c_segments_limit
795 */
796 if (c_seg->c_state == C_ON_MAJORCOMPACT_Q && C_SEG_SHOULD_MAJORCOMPACT(c_seg)) {
797
798 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
799 }
39236c6e
A
800 if (!c_seg->c_on_minorcompact_q) {
801 if (clear_busy == TRUE)
802 C_SEG_WAKEUP_DONE(c_seg);
803
804 lck_mtx_unlock_always(&c_seg->c_lock);
805
806 return (0);
807 }
808 queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list);
809 c_seg->c_on_minorcompact_q = 0;
810 c_minor_count--;
811
812 lck_mtx_unlock_always(c_list_lock);
813
814 if (disallow_page_replacement == TRUE) {
815 lck_mtx_unlock_always(&c_seg->c_lock);
816
817 PAGE_REPLACEMENT_DISALLOWED(TRUE);
818
819 lck_mtx_lock_spin_always(&c_seg->c_lock);
820 }
821 c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy);
822
823 if (disallow_page_replacement == TRUE)
824 PAGE_REPLACEMENT_DISALLOWED(FALSE);
825
826 if (need_list_lock == TRUE)
827 lck_mtx_lock_spin_always(c_list_lock);
828
829 return (c_seg_freed);
830}
831
832
833void
834c_seg_wait_on_busy(c_segment_t c_seg)
835{
836 c_seg->c_wanted = 1;
837 assert_wait((event_t) (c_seg), THREAD_UNINT);
838
839 lck_mtx_unlock_always(&c_seg->c_lock);
840 thread_block(THREAD_CONTINUE_NULL);
841}
842
843
3e170ce0
A
844void
845c_seg_switch_state(c_segment_t c_seg, int new_state, boolean_t insert_head)
846{
847 int old_state = c_seg->c_state;
39236c6e 848
3e170ce0
A
849#if __i386__ || __x86_64__
850 if (new_state != C_IS_FILLING)
39037602
A
851 LCK_MTX_ASSERT(&c_seg->c_lock, LCK_MTX_ASSERT_OWNED);
852 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
3e170ce0
A
853#endif
854 switch (old_state) {
39236c6e 855
3e170ce0
A
856 case C_IS_EMPTY:
857 assert(new_state == C_IS_FILLING || new_state == C_IS_FREE);
39236c6e 858
3e170ce0
A
859 c_empty_count--;
860 break;
861
862 case C_IS_FILLING:
863 assert(new_state == C_ON_AGE_Q || new_state == C_ON_SWAPOUT_Q);
864
865 queue_remove(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
866 c_filling_count--;
867 break;
868
869 case C_ON_AGE_Q:
870 assert(new_state == C_ON_SWAPOUT_Q || new_state == C_ON_MAJORCOMPACT_Q ||
871 new_state == C_IS_FREE);
872
873 queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list);
874 c_age_count--;
875 break;
876
877 case C_ON_SWAPPEDIN_Q:
878 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
879
880 queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
881 c_swappedin_count--;
882 break;
883
884 case C_ON_SWAPOUT_Q:
885 assert(new_state == C_ON_SWAPPEDOUT_Q || new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
886 new_state == C_ON_AGE_Q || new_state == C_IS_FREE || new_state == C_IS_EMPTY);
887
888 queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
889 thread_wakeup((event_t)&compaction_swapper_running);
890 c_swapout_count--;
891 break;
892
893 case C_ON_SWAPPEDOUT_Q:
39037602
A
894 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
895 new_state == C_ON_SWAPPEDOUTSPARSE_Q ||
3e170ce0
A
896 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
897
898 queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
899 c_swappedout_count--;
900 break;
901
902 case C_ON_SWAPPEDOUTSPARSE_Q:
39037602 903 assert(new_state == C_ON_SWAPPEDIN_Q || new_state == C_ON_AGE_Q ||
3e170ce0
A
904 new_state == C_ON_BAD_Q || new_state == C_IS_EMPTY || new_state == C_IS_FREE);
905
906 queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
907 c_swappedout_sparse_count--;
908 break;
909
910 case C_ON_MAJORCOMPACT_Q:
911 assert(new_state == C_ON_AGE_Q || new_state == C_IS_FREE);
912
913 queue_remove(&c_major_list_head, c_seg, c_segment_t, c_age_list);
914 c_major_count--;
915 break;
916
917 case C_ON_BAD_Q:
918 assert(new_state == C_IS_FREE);
919
920 queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
921 c_bad_count--;
922 break;
923
924 default:
925 panic("c_seg %p has bad c_state = %d\n", c_seg, old_state);
39236c6e 926 }
39236c6e 927
3e170ce0
A
928 switch(new_state) {
929 case C_IS_FREE:
930 assert(old_state != C_IS_FILLING);
931
932 break;
39236c6e 933
3e170ce0
A
934 case C_IS_EMPTY:
935 assert(old_state == C_ON_SWAPOUT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
39236c6e 936
3e170ce0
A
937 c_empty_count++;
938 break;
939
940 case C_IS_FILLING:
941 assert(old_state == C_IS_EMPTY);
942
943 queue_enter(&c_filling_list_head, c_seg, c_segment_t, c_age_list);
944 c_filling_count++;
945 break;
946
947 case C_ON_AGE_Q:
39037602
A
948 assert(old_state == C_IS_FILLING || old_state == C_ON_SWAPPEDIN_Q || old_state == C_ON_SWAPOUT_Q ||
949 old_state == C_ON_MAJORCOMPACT_Q || old_state == C_ON_SWAPPEDOUT_Q || old_state == C_ON_SWAPPEDOUTSPARSE_Q);
3e170ce0
A
950
951 if (old_state == C_IS_FILLING)
952 queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list);
39037602
A
953 else {
954 if (!queue_empty(&c_age_list_head)) {
955 c_segment_t c_first;
956
957 c_first = (c_segment_t)queue_first(&c_age_list_head);
958 c_seg->c_creation_ts = c_first->c_creation_ts;
959 }
960 queue_enter_first(&c_age_list_head, c_seg, c_segment_t, c_age_list);
961 }
3e170ce0
A
962 c_age_count++;
963 break;
964
965 case C_ON_SWAPPEDIN_Q:
966 assert(c_seg->c_state == C_ON_SWAPPEDOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
967
968 if (insert_head == TRUE)
969 queue_enter_first(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
970 else
971 queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list);
972 c_swappedin_count++;
973 break;
974
975 case C_ON_SWAPOUT_Q:
976 assert(old_state == C_ON_AGE_Q || old_state == C_IS_FILLING);
977
978 if (insert_head == TRUE)
979 queue_enter_first(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
980 else
981 queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
982 c_swapout_count++;
983 break;
984
985 case C_ON_SWAPPEDOUT_Q:
986 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
987
988 if (insert_head == TRUE)
989 queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
990 else
991 queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
992 c_swappedout_count++;
993 break;
994
995 case C_ON_SWAPPEDOUTSPARSE_Q:
996 assert(c_seg->c_state == C_ON_SWAPOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUT_Q);
997
39037602
A
998 if (insert_head == TRUE)
999 queue_enter_first(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1000 else
1001 queue_enter(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list);
1002
3e170ce0
A
1003 c_swappedout_sparse_count++;
1004 break;
1005
1006 case C_ON_MAJORCOMPACT_Q:
1007 assert(c_seg->c_state == C_ON_AGE_Q);
1008
1009 if (insert_head == TRUE)
1010 queue_enter_first(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1011 else
1012 queue_enter(&c_major_list_head, c_seg, c_segment_t, c_age_list);
1013 c_major_count++;
1014 break;
1015
1016 case C_ON_BAD_Q:
1017 assert(c_seg->c_state == C_ON_SWAPPEDOUT_Q || c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
1018
1019 if (insert_head == TRUE)
1020 queue_enter_first(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1021 else
1022 queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list);
1023 c_bad_count++;
1024 break;
1025
1026 default:
1027 panic("c_seg %p requesting bad c_state = %d\n", c_seg, new_state);
1028 }
1029 c_seg->c_state = new_state;
39236c6e
A
1030}
1031
1032
3e170ce0 1033
39236c6e
A
1034void
1035c_seg_free(c_segment_t c_seg)
1036{
fe8ab488 1037 assert(c_seg->c_busy);
39236c6e
A
1038
1039 lck_mtx_unlock_always(&c_seg->c_lock);
1040 lck_mtx_lock_spin_always(c_list_lock);
1041 lck_mtx_lock_spin_always(&c_seg->c_lock);
1042
1043 c_seg_free_locked(c_seg);
1044}
1045
1046
1047void
1048c_seg_free_locked(c_segment_t c_seg)
1049{
3e170ce0 1050 int segno;
04b8595b 1051 int pages_populated = 0;
39236c6e 1052 int32_t *c_buffer = NULL;
04b8595b 1053 uint64_t c_swap_handle = 0;
39236c6e 1054
3e170ce0 1055 assert(c_seg->c_busy);
39236c6e 1056 assert(!c_seg->c_on_minorcompact_q);
3e170ce0 1057 assert(!c_seg->c_busy_swapping);
39236c6e 1058
3e170ce0
A
1059 if (c_seg->c_overage_swap == TRUE) {
1060 c_overage_swapped_count--;
1061 c_seg->c_overage_swap = FALSE;
1062 }
1063 if ( !(C_SEG_IS_ONDISK(c_seg)))
1064 c_buffer = c_seg->c_store.c_buffer;
1065 else
1066 c_swap_handle = c_seg->c_store.c_swap_handle;
39236c6e 1067
3e170ce0 1068 c_seg_switch_state(c_seg, C_IS_FREE, FALSE);
39236c6e 1069
3e170ce0 1070 lck_mtx_unlock_always(c_list_lock);
39236c6e 1071
3e170ce0 1072 if (c_buffer) {
39236c6e 1073 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
39236c6e 1074 c_seg->c_store.c_buffer = NULL;
3e170ce0 1075 } else
39236c6e 1076 c_seg->c_store.c_swap_handle = (uint64_t)-1;
3e170ce0 1077
39236c6e
A
1078 lck_mtx_unlock_always(&c_seg->c_lock);
1079
1080 if (c_buffer) {
8a3053a0 1081 if (pages_populated)
39037602 1082 kernel_memory_depopulate(compressor_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
39236c6e 1083
3e170ce0
A
1084 } else if (c_swap_handle) {
1085 /*
1086 * Free swap space on disk.
1087 */
39236c6e 1088 vm_swap_free(c_swap_handle);
3e170ce0
A
1089 }
1090 lck_mtx_lock_spin_always(&c_seg->c_lock);
1091
1092 C_SEG_WAKEUP_DONE(c_seg);
1093 lck_mtx_unlock_always(&c_seg->c_lock);
1094
1095 segno = c_seg->c_mysegno;
39236c6e 1096
3e170ce0
A
1097 lck_mtx_lock_spin_always(c_list_lock);
1098 /*
1099 * because the c_buffer is now associated with the segno,
1100 * we can't put the segno back on the free list until
1101 * after we have depopulated the c_buffer range, or
1102 * we run the risk of depopulating a range that is
1103 * now being used in one of the compressor heads
1104 */
1105 c_segments[segno].c_segno = c_free_segno_head;
1106 c_free_segno_head = segno;
1107 c_segment_count--;
1108
1109 lck_mtx_unlock_always(c_list_lock);
39236c6e 1110
39236c6e 1111 lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp);
39236c6e 1112
3e170ce0
A
1113 if (c_seg->c_slot_var_array_len)
1114 kfree(c_seg->c_slot_var_array, sizeof(struct c_slot) * c_seg->c_slot_var_array_len);
39236c6e 1115
39236c6e
A
1116 zfree(compressor_segment_zone, c_seg);
1117}
1118
1119
1120int c_seg_trim_page_count = 0;
1121
1122void
1123c_seg_trim_tail(c_segment_t c_seg)
1124{
1125 c_slot_t cs;
1126 uint32_t c_size;
1127 uint32_t c_offset;
1128 uint32_t c_rounded_size;
1129 uint16_t current_nextslot;
1130 uint32_t current_populated_offset;
1131
1132 if (c_seg->c_bytes_used == 0)
1133 return;
1134 current_nextslot = c_seg->c_nextslot;
1135 current_populated_offset = c_seg->c_populated_offset;
1136
1137 while (c_seg->c_nextslot) {
1138
1139 cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1));
1140
1141 c_size = UNPACK_C_SIZE(cs);
1142
1143 if (c_size) {
1144 if (current_nextslot != c_seg->c_nextslot) {
1145 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1146 c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1147
1148 c_seg->c_nextoffset = c_offset;
1149 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1150
1151 if (c_seg->c_firstemptyslot > c_seg->c_nextslot)
1152 c_seg->c_firstemptyslot = c_seg->c_nextslot;
1153
1154 c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) -
1155 round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE);
1156 }
1157 break;
1158 }
1159 c_seg->c_nextslot--;
1160 }
1161 assert(c_seg->c_nextslot);
1162}
1163
1164
1165int
1166c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy)
1167{
1168 c_slot_mapping_t slot_ptr;
1169 uint32_t c_offset = 0;
1170 uint32_t old_populated_offset;
1171 uint32_t c_rounded_size;
1172 uint32_t c_size;
1173 int c_indx = 0;
1174 int i;
1175 c_slot_t c_dst;
1176 c_slot_t c_src;
39236c6e
A
1177
1178 assert(c_seg->c_busy);
1179
1180#if VALIDATE_C_SEGMENTS
1181 c_seg_validate(c_seg, FALSE);
1182#endif
1183 if (c_seg->c_bytes_used == 0) {
1184 c_seg_free(c_seg);
1185 return (1);
1186 }
39037602
A
1187 lck_mtx_unlock_always(&c_seg->c_lock);
1188
39236c6e
A
1189 if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE)
1190 goto done;
1191
39037602
A
1192#if DEVELOPMENT || DEBUG
1193 C_SEG_MAKE_WRITEABLE(c_seg);
1194#endif
1195
39236c6e
A
1196#if VALIDATE_C_SEGMENTS
1197 c_seg->c_was_minor_compacted++;
1198#endif
1199 c_indx = c_seg->c_firstemptyslot;
1200 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1201
1202 old_populated_offset = c_seg->c_populated_offset;
1203 c_offset = c_dst->c_offset;
1204
1205 for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) {
1206
1207 c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i);
1208
1209 c_size = UNPACK_C_SIZE(c_src);
1210
1211 if (c_size == 0)
1212 continue;
1213
39037602 1214 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
39236c6e 1215
39037602
A
1216 memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_rounded_size);
1217
1218 cslot_copy(c_dst, c_src);
39236c6e
A
1219 c_dst->c_offset = c_offset;
1220
1221 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
1222 slot_ptr->s_cindx = c_indx;
1223
39236c6e
A
1224 c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1225 PACK_C_SIZE(c_src, 0);
1226 c_indx++;
1227
1228 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
1229 }
1230 c_seg->c_firstemptyslot = c_indx;
1231 c_seg->c_nextslot = c_indx;
1232 c_seg->c_nextoffset = c_offset;
1233 c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1);
1234 c_seg->c_bytes_unused = 0;
1235
1236#if VALIDATE_C_SEGMENTS
1237 c_seg_validate(c_seg, TRUE);
1238#endif
39236c6e
A
1239 if (old_populated_offset > c_seg->c_populated_offset) {
1240 uint32_t gc_size;
1241 int32_t *gc_ptr;
1242
1243 gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset);
1244 gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset];
1245
39037602
A
1246 kernel_memory_depopulate(compressor_map, (vm_offset_t)gc_ptr, gc_size, KMA_COMPRESSOR);
1247 }
39236c6e 1248
39037602
A
1249#if DEVELOPMENT || DEBUG
1250 C_SEG_WRITE_PROTECT(c_seg);
1251#endif
39236c6e 1252
39236c6e 1253done:
39037602
A
1254 if (clear_busy == TRUE) {
1255 lck_mtx_lock_spin_always(&c_seg->c_lock);
1256 C_SEG_WAKEUP_DONE(c_seg);
39236c6e
A
1257 lck_mtx_unlock_always(&c_seg->c_lock);
1258 }
1259 return (0);
1260}
1261
1262
3e170ce0
A
1263static void
1264c_seg_alloc_nextslot(c_segment_t c_seg)
1265{
1266 struct c_slot *old_slot_array = NULL;
1267 struct c_slot *new_slot_array = NULL;
1268 int newlen;
1269 int oldlen;
1270
1271 if (c_seg->c_nextslot < c_seg_fixed_array_len)
1272 return;
1273
1274 if ((c_seg->c_nextslot - c_seg_fixed_array_len) >= c_seg->c_slot_var_array_len) {
1275
1276 oldlen = c_seg->c_slot_var_array_len;
1277 old_slot_array = c_seg->c_slot_var_array;
1278
1279 if (oldlen == 0)
1280 newlen = C_SEG_SLOT_VAR_ARRAY_MIN_LEN;
1281 else
1282 newlen = oldlen * 2;
1283
1284 new_slot_array = (struct c_slot *)kalloc(sizeof(struct c_slot) * newlen);
1285
1286 lck_mtx_lock_spin_always(&c_seg->c_lock);
1287
1288 if (old_slot_array)
1289 memcpy((char *)new_slot_array, (char *)old_slot_array, sizeof(struct c_slot) * oldlen);
1290
1291 c_seg->c_slot_var_array_len = newlen;
1292 c_seg->c_slot_var_array = new_slot_array;
1293
1294 lck_mtx_unlock_always(&c_seg->c_lock);
1295
1296 if (old_slot_array)
1297 kfree(old_slot_array, sizeof(struct c_slot) * oldlen);
1298 }
1299}
1300
1301
39236c6e
A
1302
1303struct {
1304 uint64_t asked_permission;
1305 uint64_t compactions;
1306 uint64_t moved_slots;
1307 uint64_t moved_bytes;
1308 uint64_t wasted_space_in_swapouts;
1309 uint64_t count_of_swapouts;
3e170ce0 1310 uint64_t count_of_freed_segs;
39236c6e
A
1311} c_seg_major_compact_stats;
1312
1313
04b8595b 1314#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 90) / 100)
39236c6e
A
1315
1316
1317boolean_t
1318c_seg_major_compact_ok(
1319 c_segment_t c_seg_dst,
1320 c_segment_t c_seg_src)
1321{
1322
1323 c_seg_major_compact_stats.asked_permission++;
1324
39236c6e
A
1325 if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE &&
1326 c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE)
1327 return (FALSE);
1328
3e170ce0 1329 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
39236c6e
A
1330 /*
1331 * destination segment is full... can't compact
1332 */
1333 return (FALSE);
1334 }
1335
1336 return (TRUE);
1337}
1338
1339
1340boolean_t
1341c_seg_major_compact(
1342 c_segment_t c_seg_dst,
1343 c_segment_t c_seg_src)
1344{
1345 c_slot_mapping_t slot_ptr;
1346 uint32_t c_rounded_size;
1347 uint32_t c_size;
1348 uint16_t dst_slot;
1349 int i;
1350 c_slot_t c_dst;
1351 c_slot_t c_src;
39236c6e
A
1352 boolean_t keep_compacting = TRUE;
1353
1354 /*
1355 * segments are not locked but they are both marked c_busy
1356 * which keeps c_decompress from working on them...
1357 * we can safely allocate new pages, move compressed data
1358 * from c_seg_src to c_seg_dst and update both c_segment's
1359 * state w/o holding the master lock
1360 */
39037602
A
1361#if DEVELOPMENT || DEBUG
1362 C_SEG_MAKE_WRITEABLE(c_seg_dst);
1363#endif
39236c6e
A
1364
1365#if VALIDATE_C_SEGMENTS
1366 c_seg_dst->c_was_major_compacted++;
1367 c_seg_src->c_was_major_donor++;
1368#endif
1369 c_seg_major_compact_stats.compactions++;
1370
1371 dst_slot = c_seg_dst->c_nextslot;
1372
1373 for (i = 0; i < c_seg_src->c_nextslot; i++) {
1374
1375 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i);
1376
1377 c_size = UNPACK_C_SIZE(c_src);
1378
1379 if (c_size == 0) {
1380 /* BATCH: move what we have so far; */
1381 continue;
1382 }
1383
1384 if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) {
3e170ce0
A
1385 int size_to_populate;
1386
39236c6e 1387 /* doesn't fit */
3e170ce0
A
1388 size_to_populate = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset);
1389
1390 if (size_to_populate == 0) {
39236c6e
A
1391 /* can't fit */
1392 keep_compacting = FALSE;
1393 break;
1394 }
3e170ce0
A
1395 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
1396 size_to_populate = C_SEG_MAX_POPULATE_SIZE;
1397
39037602 1398 kernel_memory_populate(compressor_map,
39236c6e 1399 (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset],
3e170ce0
A
1400 size_to_populate,
1401 KMA_COMPRESSOR,
1402 VM_KERN_MEMORY_COMPRESSOR);
39236c6e 1403
3e170ce0 1404 c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
39236c6e
A
1405 assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE);
1406 }
3e170ce0 1407 c_seg_alloc_nextslot(c_seg_dst);
39236c6e 1408
39236c6e
A
1409 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
1410
1411 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
1412
1413 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
1414
1415 c_seg_major_compact_stats.moved_slots++;
1416 c_seg_major_compact_stats.moved_bytes += c_size;
1417
39037602 1418 cslot_copy(c_dst, c_src);
39236c6e
A
1419 c_dst->c_offset = c_seg_dst->c_nextoffset;
1420
1421 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
1422 c_seg_dst->c_firstemptyslot++;
1423 c_seg_dst->c_nextslot++;
1424 c_seg_dst->c_bytes_used += c_rounded_size;
1425 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
1426
1427 PACK_C_SIZE(c_src, 0);
1428
1429 c_seg_src->c_bytes_used -= c_rounded_size;
1430 c_seg_src->c_bytes_unused += c_rounded_size;
1431 c_seg_src->c_firstemptyslot = 0;
1432
3e170ce0 1433 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
39236c6e
A
1434 /* dest segment is now full */
1435 keep_compacting = FALSE;
1436 break;
1437 }
1438 }
39037602
A
1439#if DEVELOPMENT || DEBUG
1440 C_SEG_WRITE_PROTECT(c_seg_dst);
1441#endif
39236c6e
A
1442 if (dst_slot < c_seg_dst->c_nextslot) {
1443
1444 PAGE_REPLACEMENT_ALLOWED(TRUE);
1445 /*
1446 * we've now locked out c_decompress from
1447 * converting the slot passed into it into
1448 * a c_segment_t which allows us to use
1449 * the backptr to change which c_segment and
1450 * index the slot points to
1451 */
1452 while (dst_slot < c_seg_dst->c_nextslot) {
1453
1454 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
1455
1456 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
1457 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
1458 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
1459 slot_ptr->s_cindx = dst_slot++;
1460 }
1461 PAGE_REPLACEMENT_ALLOWED(FALSE);
1462 }
1463 return (keep_compacting);
1464}
1465
1466
fe8ab488
A
1467uint64_t
1468vm_compressor_compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec)
39236c6e
A
1469{
1470 uint64_t end_msecs;
1471 uint64_t start_msecs;
1472
1473 end_msecs = (end_sec * 1000) + end_nsec / 1000000;
1474 start_msecs = (start_sec * 1000) + start_nsec / 1000000;
1475
1476 return (end_msecs - start_msecs);
1477}
1478
1479
1480
1481uint32_t compressor_eval_period_in_msecs = 250;
1482uint32_t compressor_sample_min_in_msecs = 500;
1483uint32_t compressor_sample_max_in_msecs = 10000;
1484uint32_t compressor_thrashing_threshold_per_10msecs = 50;
1485uint32_t compressor_thrashing_min_per_10msecs = 20;
1486
fe8ab488
A
1487/* When true, reset sample data next chance we get. */
1488static boolean_t compressor_need_sample_reset = FALSE;
1489
39236c6e
A
1490extern uint32_t vm_page_filecache_min;
1491
1492
1493void
1494compute_swapout_target_age(void)
1495{
1496 clock_sec_t cur_ts_sec;
1497 clock_nsec_t cur_ts_nsec;
1498 uint32_t min_operations_needed_in_this_sample;
1499 uint64_t elapsed_msecs_in_eval;
1500 uint64_t elapsed_msecs_in_sample;
39236c6e
A
1501 boolean_t need_eval_reset = FALSE;
1502
1503 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
1504
fe8ab488 1505 elapsed_msecs_in_sample = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec);
39236c6e 1506
fe8ab488
A
1507 if (compressor_need_sample_reset ||
1508 elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) {
1509 compressor_need_sample_reset = TRUE;
39236c6e
A
1510 need_eval_reset = TRUE;
1511 goto done;
1512 }
fe8ab488 1513 elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec);
39236c6e
A
1514
1515 if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs)
1516 goto done;
1517 need_eval_reset = TRUE;
1518
1519 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0);
1520
1521 min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10;
1522
1523 if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample ||
1524 (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) {
1525
1526 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count,
1527 sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0);
1528
1529 swapout_target_age = 0;
1530
fe8ab488 1531 compressor_need_sample_reset = TRUE;
39236c6e
A
1532 need_eval_reset = TRUE;
1533 goto done;
1534 }
1535 last_eval_compression_count = sample_period_compression_count;
1536 last_eval_decompression_count = sample_period_decompression_count;
1537
1538 if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) {
1539
1540 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0);
1541 goto done;
1542 }
1543 if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) {
1544
1545 uint64_t running_total;
1546 uint64_t working_target;
1547 uint64_t aging_target;
1548 uint32_t oldest_age_of_csegs_sampled = 0;
1549 uint64_t working_set_approximation = 0;
1550
1551 swapout_target_age = 0;
1552
1553 working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */
1554 aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */
1555 running_total = 0;
1556
1557 for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) {
1558
1559 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1560
1561 working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1562
1563 if (running_total >= working_target)
1564 break;
1565 }
1566 if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) {
1567
1568 working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample;
1569
1570 if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) {
1571
1572 running_total = overage_decompressions_during_sample_period;
1573
1574 for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) {
1575 running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled];
1576
1577 if (running_total >= aging_target)
1578 break;
1579 }
1580 swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled;
1581
1582 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0);
1583 } else {
1584 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0);
1585 }
1586 } else
1587 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0);
1588
fe8ab488 1589 compressor_need_sample_reset = TRUE;
39236c6e
A
1590 need_eval_reset = TRUE;
1591 } else
1592 KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0);
1593done:
fe8ab488 1594 if (compressor_need_sample_reset == TRUE) {
39236c6e
A
1595 bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period));
1596 overage_decompressions_during_sample_period = 0;
1597
1598 start_of_sample_period_sec = cur_ts_sec;
1599 start_of_sample_period_nsec = cur_ts_nsec;
1600 sample_period_decompression_count = 0;
1601 sample_period_compression_count = 0;
1602 last_eval_decompression_count = 0;
1603 last_eval_compression_count = 0;
fe8ab488 1604 compressor_need_sample_reset = FALSE;
39236c6e
A
1605 }
1606 if (need_eval_reset == TRUE) {
1607 start_of_eval_period_sec = cur_ts_sec;
1608 start_of_eval_period_nsec = cur_ts_nsec;
1609 }
1610}
1611
1612
fe8ab488 1613int compaction_swapper_init_now = 0;
39236c6e 1614int compaction_swapper_running = 0;
39037602 1615int compaction_swapper_awakened = 0;
39236c6e
A
1616int compaction_swapper_abort = 0;
1617
1618
1619#if CONFIG_JETSAM
1620boolean_t memorystatus_kill_on_VM_thrashing(boolean_t);
fe8ab488 1621boolean_t memorystatus_kill_on_FC_thrashing(boolean_t);
39236c6e 1622int compressor_thrashing_induced_jetsam = 0;
fe8ab488
A
1623int filecache_thrashing_induced_jetsam = 0;
1624static boolean_t vm_compressor_thrashing_detected = FALSE;
39236c6e
A
1625#endif /* CONFIG_JETSAM */
1626
1627static boolean_t
1628compressor_needs_to_swap(void)
1629{
1630 boolean_t should_swap = FALSE;
1631
3e170ce0
A
1632 if (vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit) {
1633 c_segment_t c_seg;
1634 clock_sec_t now;
1635 clock_sec_t age;
1636 clock_nsec_t nsec;
1637
1638 clock_get_system_nanotime(&now, &nsec);
1639 age = 0;
1640
1641 lck_mtx_lock_spin_always(c_list_lock);
1642
1643 if ( !queue_empty(&c_age_list_head)) {
1644 c_seg = (c_segment_t) queue_first(&c_age_list_head);
1645
1646 age = now - c_seg->c_creation_ts;
1647 }
1648 lck_mtx_unlock_always(c_list_lock);
1649
1650 if (age >= vm_ripe_target_age)
1651 return (TRUE);
1652 }
39037602 1653 if (VM_CONFIG_SWAP_IS_ACTIVE) {
39236c6e
A
1654 if (COMPRESSOR_NEEDS_TO_SWAP()) {
1655 return (TRUE);
1656 }
1657 if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) {
1658 return (TRUE);
1659 }
3e170ce0 1660 if (vm_page_free_count < (vm_page_free_reserved - (COMPRESSOR_FREE_RESERVED_LIMIT * 2)))
39236c6e
A
1661 return (TRUE);
1662 }
1663 compute_swapout_target_age();
1664
1665 if (swapout_target_age) {
1666 c_segment_t c_seg;
1667
1668 lck_mtx_lock_spin_always(c_list_lock);
1669
1670 if (!queue_empty(&c_age_list_head)) {
1671
1672 c_seg = (c_segment_t) queue_first(&c_age_list_head);
1673
fe8ab488 1674 if (c_seg->c_creation_ts > swapout_target_age)
39236c6e
A
1675 swapout_target_age = 0;
1676 }
1677 lck_mtx_unlock_always(c_list_lock);
1678 }
fe8ab488
A
1679#if CONFIG_PHANTOM_CACHE
1680 if (vm_phantom_cache_check_pressure())
1681 should_swap = TRUE;
1682#endif
1683 if (swapout_target_age)
1684 should_swap = TRUE;
39236c6e 1685
fe8ab488 1686#if CONFIG_JETSAM
3e170ce0
A
1687 if (should_swap || c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) {
1688
1689 if (vm_compressor_thrashing_detected == FALSE) {
1690 vm_compressor_thrashing_detected = TRUE;
fe8ab488 1691
3e170ce0
A
1692 if (swapout_target_age || c_segment_pages_compressed > c_segment_pages_compressed_nearing_limit) {
1693 memorystatus_kill_on_VM_thrashing(TRUE /* async */);
1694 compressor_thrashing_induced_jetsam++;
1695 } else {
1696 memorystatus_kill_on_FC_thrashing(TRUE /* async */);
1697 filecache_thrashing_induced_jetsam++;
39236c6e 1698 }
3e170ce0
A
1699 }
1700 /*
1701 * let the jetsam take precedence over
1702 * any major compactions we might have
1703 * been able to do... otherwise we run
1704 * the risk of doing major compactions
1705 * on segments we're about to free up
1706 * due to the jetsam activity.
1707 */
1708 should_swap = FALSE;
39236c6e 1709 }
fe8ab488 1710
3e170ce0
A
1711#endif /* CONFIG_JETSAM */
1712
1713 if (should_swap == FALSE) {
1714 /*
39037602 1715 * vm_compressor_needs_to_major_compact returns true only if we're
3e170ce0
A
1716 * about to run out of available compressor segments... in this
1717 * case, we absolutely need to run a major compaction even if
1718 * we've just kicked off a jetsam or we don't otherwise need to
1719 * swap... terminating objects releases
1720 * pages back to the uncompressed cache, but does not guarantee
1721 * that we will free up even a single compression segment
1722 */
39037602 1723 should_swap = vm_compressor_needs_to_major_compact();
3e170ce0
A
1724 }
1725
1726 /*
1727 * returning TRUE when swap_supported == FALSE
39236c6e
A
1728 * will cause the major compaction engine to
1729 * run, but will not trigger any swapping...
1730 * segments that have been major compacted
3e170ce0 1731 * will be moved to the majorcompact queue
39236c6e
A
1732 */
1733 return (should_swap);
1734}
1735
fe8ab488
A
1736#if CONFIG_JETSAM
1737/*
1738 * This function is called from the jetsam thread after killing something to
1739 * mitigate thrashing.
1740 *
1741 * We need to restart our thrashing detection heuristics since memory pressure
1742 * has potentially changed significantly, and we don't want to detect on old
1743 * data from before the jetsam.
1744 */
1745void
1746vm_thrashing_jetsam_done(void)
39236c6e 1747{
fe8ab488 1748 vm_compressor_thrashing_detected = FALSE;
39236c6e 1749
fe8ab488
A
1750 /* Were we compressor-thrashing or filecache-thrashing? */
1751 if (swapout_target_age) {
1752 swapout_target_age = 0;
1753 compressor_need_sample_reset = TRUE;
39236c6e 1754 }
fe8ab488
A
1755#if CONFIG_PHANTOM_CACHE
1756 else {
1757 vm_phantom_cache_restart_sample();
1758 }
1759#endif
39236c6e 1760}
fe8ab488 1761#endif /* CONFIG_JETSAM */
39236c6e
A
1762
1763uint32_t vm_wake_compactor_swapper_calls = 0;
39037602
A
1764uint32_t vm_run_compactor_already_running = 0;
1765uint32_t vm_run_compactor_empty_minor_q = 0;
1766uint32_t vm_run_compactor_did_compact = 0;
1767uint32_t vm_run_compactor_waited = 0;
1768
1769void
1770vm_run_compactor(void)
1771{
1772 if (c_segment_count == 0)
1773 return;
1774
1775 lck_mtx_lock_spin_always(c_list_lock);
1776
1777 if (c_minor_count == 0) {
1778 vm_run_compactor_empty_minor_q++;
1779
1780 lck_mtx_unlock_always(c_list_lock);
1781 return;
1782 }
1783 if (compaction_swapper_running) {
1784
1785 if (vm_restricted_to_single_processor == FALSE) {
1786 vm_run_compactor_already_running++;
1787
1788 lck_mtx_unlock_always(c_list_lock);
1789 return;
1790 }
1791 vm_run_compactor_waited++;
1792
1793 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
1794
1795 lck_mtx_unlock_always(c_list_lock);
1796
1797 thread_block(THREAD_CONTINUE_NULL);
1798
1799 return;
1800 }
1801 vm_run_compactor_did_compact++;
1802
1803 fastwake_warmup = FALSE;
1804 compaction_swapper_running = 1;
1805
1806 vm_compressor_do_delayed_compactions(FALSE);
1807
1808 compaction_swapper_running = 0;
1809
1810 lck_mtx_unlock_always(c_list_lock);
1811
1812 thread_wakeup((event_t)&compaction_swapper_running);
1813}
1814
39236c6e
A
1815
1816void
1817vm_wake_compactor_swapper(void)
1818{
39037602 1819 if (compaction_swapper_running || compaction_swapper_awakened || c_segment_count == 0)
39236c6e
A
1820 return;
1821
39037602 1822 if (c_minor_count || vm_compressor_needs_to_major_compact()) {
3e170ce0
A
1823
1824 lck_mtx_lock_spin_always(c_list_lock);
1825
1826 fastwake_warmup = FALSE;
1827
39037602 1828 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
3e170ce0
A
1829
1830 vm_wake_compactor_swapper_calls++;
1831
39037602 1832 compaction_swapper_awakened = 1;
3e170ce0 1833 thread_wakeup((event_t)&c_compressor_swap_trigger);
3e170ce0
A
1834 }
1835 lck_mtx_unlock_always(c_list_lock);
1836 }
1837}
1838
1839
1840void
1841vm_consider_swapping()
1842{
1843 c_segment_t c_seg, c_seg_next;
1844 clock_sec_t now;
1845 clock_nsec_t nsec;
1846
39037602 1847 assert(VM_CONFIG_SWAP_IS_PRESENT);
39236c6e
A
1848
1849 lck_mtx_lock_spin_always(c_list_lock);
1850
3e170ce0 1851 compaction_swapper_abort = 1;
39236c6e 1852
3e170ce0
A
1853 while (compaction_swapper_running) {
1854 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
39236c6e 1855
3e170ce0 1856 lck_mtx_unlock_always(c_list_lock);
39236c6e 1857
3e170ce0
A
1858 thread_block(THREAD_CONTINUE_NULL);
1859
1860 lck_mtx_lock_spin_always(c_list_lock);
1861 }
1862 compaction_swapper_abort = 0;
1863 compaction_swapper_running = 1;
1864
1865 vm_swapout_ripe_segments = TRUE;
1866
1867 if (!queue_empty(&c_major_list_head)) {
1868
1869 clock_get_system_nanotime(&now, &nsec);
1870
1871 c_seg = (c_segment_t)queue_first(&c_major_list_head);
1872
1873 while (!queue_end(&c_major_list_head, (queue_entry_t)c_seg)) {
1874
1875 if (c_overage_swapped_count >= c_overage_swapped_limit)
1876 break;
1877
1878 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
1879
1880 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
1881
1882 lck_mtx_lock_spin_always(&c_seg->c_lock);
1883
1884 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1885
1886 lck_mtx_unlock_always(&c_seg->c_lock);
1887 }
1888 c_seg = c_seg_next;
1889 }
39236c6e 1890 }
3e170ce0
A
1891 vm_compressor_compact_and_swap(FALSE);
1892
1893 compaction_swapper_running = 0;
1894
1895 vm_swapout_ripe_segments = FALSE;
1896
39236c6e 1897 lck_mtx_unlock_always(c_list_lock);
39037602
A
1898
1899 thread_wakeup((event_t)&compaction_swapper_running);
39236c6e
A
1900}
1901
fe8ab488 1902
39236c6e
A
1903void
1904vm_consider_waking_compactor_swapper(void)
1905{
1906 boolean_t need_wakeup = FALSE;
1907
39037602 1908 if (c_segment_count == 0)
39236c6e 1909 return;
fe8ab488 1910
39037602 1911 if (compaction_swapper_running || compaction_swapper_awakened)
3e170ce0
A
1912 return;
1913
fe8ab488
A
1914 if (!compaction_swapper_inited && !compaction_swapper_init_now) {
1915 compaction_swapper_init_now = 1;
1916 need_wakeup = TRUE;
1917 }
39236c6e
A
1918
1919 if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) {
1920
1921 need_wakeup = TRUE;
1922
1923 } else if (compressor_needs_to_swap()) {
1924
1925 need_wakeup = TRUE;
1926
1927 } else if (c_minor_count) {
1928 uint64_t total_bytes;
1929
1930 total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64;
1931
1932 if ((total_bytes - compressor_bytes_used) > total_bytes / 10)
1933 need_wakeup = TRUE;
1934 }
1935 if (need_wakeup == TRUE) {
1936
1937 lck_mtx_lock_spin_always(c_list_lock);
1938
1939 fastwake_warmup = FALSE;
1940
39037602 1941 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
39236c6e
A
1942 memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE);
1943
39037602 1944 compaction_swapper_awakened = 1;
39236c6e 1945 thread_wakeup((event_t)&c_compressor_swap_trigger);
39236c6e
A
1946 }
1947 lck_mtx_unlock_always(c_list_lock);
1948 }
1949}
1950
1951
1952#define C_SWAPOUT_LIMIT 4
1953#define DELAYED_COMPACTIONS_PER_PASS 30
1954
1955void
1956vm_compressor_do_delayed_compactions(boolean_t flush_all)
1957{
1958 c_segment_t c_seg;
1959 int number_compacted = 0;
1960 boolean_t needs_to_swap = FALSE;
1961
1962
39037602 1963 LCK_MTX_ASSERT(c_list_lock, LCK_MTX_ASSERT_OWNED);
39236c6e
A
1964
1965 while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) {
1966
1967 c_seg = (c_segment_t)queue_first(&c_minor_list_head);
1968
1969 lck_mtx_lock_spin_always(&c_seg->c_lock);
8a3053a0 1970
fe8ab488 1971 if (c_seg->c_busy) {
8a3053a0 1972
fe8ab488
A
1973 lck_mtx_unlock_always(c_list_lock);
1974 c_seg_wait_on_busy(c_seg);
1975 lck_mtx_lock_spin_always(c_list_lock);
8a3053a0 1976
fe8ab488
A
1977 continue;
1978 }
1979 C_SEG_BUSY(c_seg);
39236c6e
A
1980
1981 c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE);
1982
39037602 1983 if (VM_CONFIG_SWAP_IS_ACTIVE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) {
39236c6e
A
1984
1985 if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT)
1986 needs_to_swap = TRUE;
1987
1988 number_compacted = 0;
1989 }
1990 lck_mtx_lock_spin_always(c_list_lock);
1991 }
1992}
1993
1994
1995#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10
1996
1997static void
1998vm_compressor_age_swapped_in_segments(boolean_t flush_all)
1999{
2000 c_segment_t c_seg;
2001 clock_sec_t now;
2002 clock_nsec_t nsec;
2003
2004 clock_get_system_nanotime(&now, &nsec);
2005
2006 while (!queue_empty(&c_swappedin_list_head)) {
2007
2008 c_seg = (c_segment_t)queue_first(&c_swappedin_list_head);
2009
2010 if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT)
2011 break;
2012
2013 lck_mtx_lock_spin_always(&c_seg->c_lock);
2014
3e170ce0 2015 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
39236c6e
A
2016
2017 lck_mtx_unlock_always(&c_seg->c_lock);
2018 }
2019}
2020
2021
39037602
A
2022extern int vm_num_swap_files;
2023extern int vm_num_pinned_swap_files;
2024extern int vm_swappin_enabled;
2025
2026extern unsigned int vm_swapfile_total_segs_used;
2027extern unsigned int vm_swapfile_total_segs_alloced;
2028
2029
39236c6e
A
2030void
2031vm_compressor_flush(void)
2032{
2033 uint64_t vm_swap_put_failures_at_start;
2034 wait_result_t wait_result = 0;
2035 AbsoluteTime startTime, endTime;
2036 clock_sec_t now_sec;
2037 clock_nsec_t now_nsec;
2038 uint64_t nsec;
2039
2040 HIBLOG("vm_compressor_flush - starting\n");
2041
2042 clock_get_uptime(&startTime);
2043
2044 lck_mtx_lock_spin_always(c_list_lock);
2045
2046 fastwake_warmup = FALSE;
2047 compaction_swapper_abort = 1;
2048
2049 while (compaction_swapper_running) {
2050 assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT);
2051
2052 lck_mtx_unlock_always(c_list_lock);
2053
2054 thread_block(THREAD_CONTINUE_NULL);
2055
2056 lck_mtx_lock_spin_always(c_list_lock);
2057 }
2058 compaction_swapper_abort = 0;
2059 compaction_swapper_running = 1;
2060
2061 hibernate_flushing = TRUE;
2062 hibernate_no_swapspace = FALSE;
2063 c_generation_id_flush_barrier = c_generation_id + 1000;
2064
2065 clock_get_system_nanotime(&now_sec, &now_nsec);
2066 hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE;
2067
2068 vm_swap_put_failures_at_start = vm_swap_put_failures;
2069
2070 vm_compressor_compact_and_swap(TRUE);
2071
2072 while (!queue_empty(&c_swapout_list_head)) {
2073
2074 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
2075
2076 lck_mtx_unlock_always(c_list_lock);
2077
2078 wait_result = thread_block(THREAD_CONTINUE_NULL);
2079
2080 lck_mtx_lock_spin_always(c_list_lock);
2081
2082 if (wait_result == THREAD_TIMED_OUT)
2083 break;
2084 }
2085 hibernate_flushing = FALSE;
2086 compaction_swapper_running = 0;
2087
2088 if (vm_swap_put_failures > vm_swap_put_failures_at_start)
2089 HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n",
2090 vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT);
2091
2092 lck_mtx_unlock_always(c_list_lock);
2093
39037602
A
2094 thread_wakeup((event_t)&compaction_swapper_running);
2095
39236c6e
A
2096 clock_get_uptime(&endTime);
2097 SUB_ABSOLUTETIME(&endTime, &startTime);
2098 absolutetime_to_nanoseconds(endTime, &nsec);
2099
39037602
A
2100 HIBLOG("vm_compressor_flush completed - took %qd msecs - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d, vm_swappin_enabled = %d\n",
2101 nsec / 1000000ULL, vm_num_swap_files, vm_num_pinned_swap_files, vm_swappin_enabled);
39236c6e
A
2102}
2103
2104
39236c6e
A
2105int compaction_swap_trigger_thread_awakened = 0;
2106
2107static void
2108vm_compressor_swap_trigger_thread(void)
2109{
3e170ce0
A
2110 current_thread()->options |= TH_OPT_VMPRIV;
2111
fe8ab488
A
2112 /*
2113 * compaction_swapper_init_now is set when the first call to
2114 * vm_consider_waking_compactor_swapper is made from
2115 * vm_pageout_scan... since this function is called upon
2116 * thread creation, we want to make sure to delay adjusting
2117 * the tuneables until we are awakened via vm_pageout_scan
2118 * so that we are at a point where the vm_swapfile_open will
2119 * be operating on the correct directory (in case the default
2120 * of /var/vm/ is overridden by the dymanic_pager
2121 */
39037602
A
2122 if (compaction_swapper_init_now) {
2123 vm_compaction_swapper_do_init();
39236c6e 2124
3e170ce0
A
2125 if (vm_restricted_to_single_processor == TRUE)
2126 thread_vm_bind_group_add();
2127
39037602 2128 compaction_swapper_init_now = 0;
fe8ab488 2129 }
39236c6e
A
2130 lck_mtx_lock_spin_always(c_list_lock);
2131
2132 compaction_swap_trigger_thread_awakened++;
39037602 2133 compaction_swapper_awakened = 0;
39236c6e 2134
39037602 2135 if (compaction_swapper_running == 0) {
39236c6e 2136
39037602
A
2137 compaction_swapper_running = 1;
2138
2139 vm_compressor_compact_and_swap(FALSE);
2140
2141 compaction_swapper_running = 0;
2142 }
39236c6e
A
2143 assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT);
2144
39037602
A
2145 if (compaction_swapper_running == 0)
2146 thread_wakeup((event_t)&compaction_swapper_running);
39236c6e
A
2147
2148 lck_mtx_unlock_always(c_list_lock);
2149
2150 thread_block((thread_continue_t)vm_compressor_swap_trigger_thread);
2151
2152 /* NOTREACHED */
2153}
2154
2155
2156void
2157vm_compressor_record_warmup_start(void)
2158{
2159 c_segment_t c_seg;
2160
2161 lck_mtx_lock_spin_always(c_list_lock);
2162
8a3053a0
A
2163 if (first_c_segment_to_warm_generation_id == 0) {
2164 if (!queue_empty(&c_age_list_head)) {
39236c6e 2165
8a3053a0 2166 c_seg = (c_segment_t)queue_last(&c_age_list_head);
39236c6e 2167
8a3053a0
A
2168 first_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2169 } else
2170 first_c_segment_to_warm_generation_id = 0;
39236c6e 2171
8a3053a0
A
2172 fastwake_recording_in_progress = TRUE;
2173 }
39236c6e
A
2174 lck_mtx_unlock_always(c_list_lock);
2175}
2176
2177
2178void
2179vm_compressor_record_warmup_end(void)
2180{
2181 c_segment_t c_seg;
2182
2183 lck_mtx_lock_spin_always(c_list_lock);
2184
8a3053a0 2185 if (fastwake_recording_in_progress == TRUE) {
39236c6e 2186
8a3053a0 2187 if (!queue_empty(&c_age_list_head)) {
39236c6e 2188
8a3053a0
A
2189 c_seg = (c_segment_t)queue_last(&c_age_list_head);
2190
2191 last_c_segment_to_warm_generation_id = c_seg->c_generation_id;
2192 } else
2193 last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id;
39236c6e 2194
8a3053a0 2195 fastwake_recording_in_progress = FALSE;
39236c6e 2196
8a3053a0
A
2197 HIBLOG("vm_compressor_record_warmup (%qd - %qd)\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2198 }
39236c6e
A
2199 lck_mtx_unlock_always(c_list_lock);
2200}
2201
2202
39037602 2203#define DELAY_TRIM_ON_WAKE_SECS 25
39236c6e
A
2204
2205void
8a3053a0 2206vm_compressor_delay_trim(void)
39236c6e 2207{
8a3053a0 2208 clock_sec_t sec;
39236c6e
A
2209 clock_nsec_t nsec;
2210
2211 clock_get_system_nanotime(&sec, &nsec);
2212 dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS;
8a3053a0 2213}
39236c6e 2214
39236c6e 2215
8a3053a0
A
2216void
2217vm_compressor_do_warmup(void)
2218{
39236c6e
A
2219 lck_mtx_lock_spin_always(c_list_lock);
2220
8a3053a0
A
2221 if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) {
2222 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2223
2224 lck_mtx_unlock_always(c_list_lock);
2225 return;
2226 }
2227
39037602 2228 if (compaction_swapper_running == 0 && compaction_swapper_awakened == 0) {
39236c6e
A
2229
2230 fastwake_warmup = TRUE;
39037602
A
2231
2232 compaction_swapper_awakened = 1;
39236c6e
A
2233 thread_wakeup((event_t)&c_compressor_swap_trigger);
2234 }
2235 lck_mtx_unlock_always(c_list_lock);
2236}
2237
d190cdc3
A
2238void
2239do_fastwake_warmup_all(void)
2240{
2241
2242 lck_mtx_lock_spin_always(c_list_lock);
2243
2244 if (queue_empty(&c_swappedout_list_head) && queue_empty(&c_swappedout_sparse_list_head)) {
2245
2246 lck_mtx_unlock_always(c_list_lock);
2247 return;
2248 }
2249
2250 fastwake_warmup = TRUE;
2251
2252 do_fastwake_warmup(&c_swappedout_list_head, TRUE);
2253
2254 do_fastwake_warmup(&c_swappedout_sparse_list_head, TRUE);
2255
2256 fastwake_warmup = FALSE;
2257
2258 lck_mtx_unlock_always(c_list_lock);
2259
2260}
39236c6e
A
2261
2262void
d190cdc3 2263do_fastwake_warmup(queue_head_t *c_queue, boolean_t consider_all_cseg)
39236c6e 2264{
39236c6e 2265 c_segment_t c_seg = NULL;
8a3053a0
A
2266 AbsoluteTime startTime, endTime;
2267 uint64_t nsec;
2268
2269
2270 HIBLOG("vm_compressor_fastwake_warmup (%qd - %qd) - starting\n", first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id);
2271
2272 clock_get_uptime(&startTime);
39236c6e
A
2273
2274 lck_mtx_unlock_always(c_list_lock);
2275
39037602
A
2276 proc_set_thread_policy(current_thread(),
2277 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
39236c6e
A
2278
2279 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2280
2281 lck_mtx_lock_spin_always(c_list_lock);
2282
d190cdc3 2283 while (!queue_empty(c_queue) && fastwake_warmup == TRUE) {
39236c6e 2284
d190cdc3 2285 c_seg = (c_segment_t) queue_first(c_queue);
39236c6e 2286
d190cdc3
A
2287 if (consider_all_cseg == FALSE) {
2288 if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id ||
2289 c_seg->c_generation_id > last_c_segment_to_warm_generation_id)
2290 break;
39236c6e 2291
d190cdc3
A
2292 if (vm_page_free_count < (AVAILABLE_MEMORY / 4))
2293 break;
2294 }
4bd07ac2 2295
39236c6e
A
2296 lck_mtx_lock_spin_always(&c_seg->c_lock);
2297 lck_mtx_unlock_always(c_list_lock);
2298
8a3053a0
A
2299 if (c_seg->c_busy) {
2300 PAGE_REPLACEMENT_DISALLOWED(FALSE);
39236c6e 2301 c_seg_wait_on_busy(c_seg);
8a3053a0
A
2302 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2303 } else {
39037602
A
2304 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0)
2305 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e 2306 c_segment_warmup_count++;
8a3053a0
A
2307
2308 PAGE_REPLACEMENT_DISALLOWED(FALSE);
39236c6e 2309 vm_pageout_io_throttle();
8a3053a0 2310 PAGE_REPLACEMENT_DISALLOWED(TRUE);
39236c6e
A
2311 }
2312 lck_mtx_lock_spin_always(c_list_lock);
2313 }
2314 lck_mtx_unlock_always(c_list_lock);
2315
2316 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2317
39037602
A
2318 proc_set_thread_policy(current_thread(),
2319 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0);
39236c6e 2320
8a3053a0
A
2321 clock_get_uptime(&endTime);
2322 SUB_ABSOLUTETIME(&endTime, &startTime);
2323 absolutetime_to_nanoseconds(endTime, &nsec);
2324
2325 HIBLOG("vm_compressor_fastwake_warmup completed - took %qd msecs\n", nsec / 1000000ULL);
2326
39236c6e 2327 lck_mtx_lock_spin_always(c_list_lock);
8a3053a0 2328
d190cdc3
A
2329 if (consider_all_cseg == FALSE) {
2330 first_c_segment_to_warm_generation_id = last_c_segment_to_warm_generation_id = 0;
2331 }
39236c6e
A
2332}
2333
2334
2335void
2336vm_compressor_compact_and_swap(boolean_t flush_all)
2337{
2338 c_segment_t c_seg, c_seg_next;
2339 boolean_t keep_compacting;
3e170ce0
A
2340 clock_sec_t now;
2341 clock_nsec_t nsec;
39236c6e
A
2342
2343
2344 if (fastwake_warmup == TRUE) {
2345 uint64_t starting_warmup_count;
2346
2347 starting_warmup_count = c_segment_warmup_count;
2348
2349 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count,
2350 first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0);
d190cdc3 2351 do_fastwake_warmup(&c_swappedout_list_head, FALSE);
39236c6e
A
2352 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0);
2353
2354 fastwake_warmup = FALSE;
2355 }
2356
8a3053a0
A
2357 /*
2358 * it's possible for the c_age_list_head to be empty if we
2359 * hit our limits for growing the compressor pool and we subsequently
2360 * hibernated... on the next hibernation we could see the queue as
2361 * empty and not proceeed even though we have a bunch of segments on
2362 * the swapped in queue that need to be dealt with.
2363 */
2364 vm_compressor_do_delayed_compactions(flush_all);
2365
2366 vm_compressor_age_swapped_in_segments(flush_all);
2367
3e170ce0
A
2368 /*
2369 * we only need to grab the timestamp once per
2370 * invocation of this function since the
2371 * timescale we're interested in is measured
2372 * in days
2373 */
2374 clock_get_system_nanotime(&now, &nsec);
8a3053a0 2375
39236c6e
A
2376 while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) {
2377
2378 if (hibernate_flushing == TRUE) {
2379 clock_sec_t sec;
39236c6e
A
2380
2381 if (hibernate_should_abort()) {
2382 HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n");
2383 break;
2384 }
2385 if (hibernate_no_swapspace == TRUE) {
2386 HIBLOG("vm_compressor_flush - out of swap space\n");
2387 break;
2388 }
39037602
A
2389 if (vm_swap_files_pinned() == FALSE) {
2390 HIBLOG("vm_compressor_flush - unpinned swap files\n");
2391 break;
2392 }
2393 if (hibernate_in_progress_with_pinned_swap == TRUE &&
2394 (vm_swapfile_total_segs_alloced == vm_swapfile_total_segs_used)) {
2395 HIBLOG("vm_compressor_flush - out of pinned swap space\n");
2396 break;
2397 }
39236c6e
A
2398 clock_get_system_nanotime(&sec, &nsec);
2399
2400 if (sec > hibernate_flushing_deadline) {
2401 HIBLOG("vm_compressor_flush - failed to finish before deadline\n");
2402 break;
2403 }
2404 }
2405 if (c_swapout_count >= C_SWAPOUT_LIMIT) {
2406
2407 assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000*NSEC_PER_USEC);
2408
2409 lck_mtx_unlock_always(c_list_lock);
2410
2411 thread_block(THREAD_CONTINUE_NULL);
2412
2413 lck_mtx_lock_spin_always(c_list_lock);
2414 }
2415 /*
2416 * Minor compactions
2417 */
2418 vm_compressor_do_delayed_compactions(flush_all);
2419
2420 vm_compressor_age_swapped_in_segments(flush_all);
2421
2422 if (c_swapout_count >= C_SWAPOUT_LIMIT) {
2423 /*
2424 * we timed out on the above thread_block
2425 * let's loop around and try again
2426 * the timeout allows us to continue
2427 * to do minor compactions to make
2428 * more memory available
2429 */
2430 continue;
2431 }
2432
2433 /*
2434 * Swap out segments?
2435 */
2436 if (flush_all == FALSE) {
2437 boolean_t needs_to_swap;
2438
2439 lck_mtx_unlock_always(c_list_lock);
2440
2441 needs_to_swap = compressor_needs_to_swap();
2442
3e170ce0
A
2443 if (needs_to_swap == TRUE && vm_swap_low_on_space())
2444 vm_compressor_take_paging_space_action();
2445
39236c6e
A
2446 lck_mtx_lock_spin_always(c_list_lock);
2447
2448 if (needs_to_swap == FALSE)
2449 break;
2450 }
2451 if (queue_empty(&c_age_list_head))
2452 break;
2453 c_seg = (c_segment_t) queue_first(&c_age_list_head);
2454
3e170ce0 2455 assert(c_seg->c_state == C_ON_AGE_Q);
39236c6e 2456
3e170ce0 2457 if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier)
39236c6e 2458 break;
3e170ce0 2459
39236c6e
A
2460 lck_mtx_lock_spin_always(&c_seg->c_lock);
2461
2462 if (c_seg->c_busy) {
2463
2464 lck_mtx_unlock_always(c_list_lock);
2465 c_seg_wait_on_busy(c_seg);
2466 lck_mtx_lock_spin_always(c_list_lock);
2467
2468 continue;
2469 }
fe8ab488 2470 C_SEG_BUSY(c_seg);
39236c6e
A
2471
2472 if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) {
2473 /*
2474 * found an empty c_segment and freed it
2475 * so go grab the next guy in the queue
2476 */
3e170ce0 2477 c_seg_major_compact_stats.count_of_freed_segs++;
39236c6e
A
2478 continue;
2479 }
2480 /*
2481 * Major compaction
2482 */
2483 keep_compacting = TRUE;
2484
2485 while (keep_compacting == TRUE) {
2486
2487 assert(c_seg->c_busy);
2488
2489 /* look for another segment to consolidate */
2490
2491 c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list);
2492
2493 if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next))
2494 break;
2495
3e170ce0
A
2496 assert(c_seg_next->c_state == C_ON_AGE_Q);
2497
39236c6e
A
2498 if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE)
2499 break;
2500
2501 lck_mtx_lock_spin_always(&c_seg_next->c_lock);
2502
2503 if (c_seg_next->c_busy) {
2504
2505 lck_mtx_unlock_always(c_list_lock);
2506 c_seg_wait_on_busy(c_seg_next);
2507 lck_mtx_lock_spin_always(c_list_lock);
2508
2509 continue;
2510 }
2511 /* grab that segment */
fe8ab488 2512 C_SEG_BUSY(c_seg_next);
39236c6e
A
2513
2514 if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) {
2515 /*
2516 * found an empty c_segment and freed it
2517 * so we can't continue to use c_seg_next
2518 */
3e170ce0 2519 c_seg_major_compact_stats.count_of_freed_segs++;
39236c6e
A
2520 continue;
2521 }
2522
2523 /* unlock the list ... */
2524 lck_mtx_unlock_always(c_list_lock);
2525
2526 /* do the major compaction */
2527
2528 keep_compacting = c_seg_major_compact(c_seg, c_seg_next);
2529
2530 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2531
2532 lck_mtx_lock_spin_always(&c_seg_next->c_lock);
2533 /*
2534 * run a minor compaction on the donor segment
2535 * since we pulled at least some of it's
2536 * data into our target... if we've emptied
2537 * it, now is a good time to free it which
2538 * c_seg_minor_compaction_and_unlock also takes care of
2539 *
2540 * by passing TRUE, we ask for c_busy to be cleared
2541 * and c_wanted to be taken care of
2542 */
3e170ce0
A
2543 if (c_seg_minor_compaction_and_unlock(c_seg_next, TRUE))
2544 c_seg_major_compact_stats.count_of_freed_segs++;
39236c6e
A
2545
2546 PAGE_REPLACEMENT_DISALLOWED(FALSE);
2547
2548 /* relock the list */
2549 lck_mtx_lock_spin_always(c_list_lock);
2550
2551 } /* major compaction */
2552
39236c6e
A
2553 lck_mtx_lock_spin_always(&c_seg->c_lock);
2554
2555 assert(c_seg->c_busy);
39236c6e
A
2556 assert(!c_seg->c_on_minorcompact_q);
2557
39037602 2558 if (VM_CONFIG_SWAP_IS_ACTIVE) {
3e170ce0
A
2559 /*
2560 * This mode of putting a generic c_seg on the swapout list is
39037602 2561 * only supported when we have general swapping enabled
3e170ce0 2562 */
39037602
A
2563 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
2564 } else {
2565 if ((vm_swapout_ripe_segments == TRUE && c_overage_swapped_count < c_overage_swapped_limit)) {
2566
2567 assert(VM_CONFIG_SWAP_IS_PRESENT);
2568 /*
2569 * we are running compressor sweeps with swap-behind
2570 * make sure the c_seg has aged enough before swapping it
2571 * out...
2572 */
2573 if ((now - c_seg->c_creation_ts) >= vm_ripe_target_age) {
2574 c_seg->c_overage_swap = TRUE;
2575 c_overage_swapped_count++;
2576 c_seg_switch_state(c_seg, C_ON_SWAPOUT_Q, FALSE);
3e170ce0
A
2577 }
2578 }
2579 }
2580 if (c_seg->c_state == C_ON_AGE_Q) {
2581 /*
2582 * this c_seg didn't get moved to the swapout queue
2583 * so we need to move it out of the way...
2584 * we just did a major compaction on it so put it
2585 * on that queue
2586 */
2587 c_seg_switch_state(c_seg, C_ON_MAJORCOMPACT_Q, FALSE);
39236c6e 2588 } else {
3e170ce0
A
2589 c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used;
2590 c_seg_major_compact_stats.count_of_swapouts++;
39236c6e
A
2591 }
2592 C_SEG_WAKEUP_DONE(c_seg);
2593
2594 lck_mtx_unlock_always(&c_seg->c_lock);
2595
2596 if (c_swapout_count) {
2597 lck_mtx_unlock_always(c_list_lock);
2598
2599 thread_wakeup((event_t)&c_swapout_list_head);
2600
2601 lck_mtx_lock_spin_always(c_list_lock);
2602 }
2603 }
2604}
2605
2606
2607static c_segment_t
2608c_seg_allocate(c_segment_t *current_chead)
2609{
39236c6e 2610 c_segment_t c_seg;
3e170ce0
A
2611 int min_needed;
2612 int size_to_populate;
2613
2614 if (vm_compressor_low_on_space())
2615 vm_compressor_take_paging_space_action();
39236c6e
A
2616
2617 if ( (c_seg = *current_chead) == NULL ) {
2618 uint32_t c_segno;
2619
39236c6e
A
2620 lck_mtx_lock_spin_always(c_list_lock);
2621
2622 while (c_segments_busy == TRUE) {
2623 assert_wait((event_t) (&c_segments_busy), THREAD_UNINT);
2624
2625 lck_mtx_unlock_always(c_list_lock);
2626
2627 thread_block(THREAD_CONTINUE_NULL);
2628
2629 lck_mtx_lock_spin_always(c_list_lock);
2630 }
2631 if (c_free_segno_head == (uint32_t)-1) {
3e170ce0 2632 uint32_t c_segments_available_new;
39236c6e
A
2633
2634 if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) {
2635 lck_mtx_unlock_always(c_list_lock);
2636
39236c6e
A
2637 return (NULL);
2638 }
2639 c_segments_busy = TRUE;
2640 lck_mtx_unlock_always(c_list_lock);
2641
39037602 2642 kernel_memory_populate(compressor_map, (vm_offset_t)c_segments_next_page,
3e170ce0 2643 PAGE_SIZE, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR);
39236c6e
A
2644 c_segments_next_page += PAGE_SIZE;
2645
3e170ce0
A
2646 c_segments_available_new = c_segments_available + C_SEGMENTS_PER_PAGE;
2647
2648 if (c_segments_available_new > c_segments_limit)
2649 c_segments_available_new = c_segments_limit;
2650
2651 for (c_segno = c_segments_available + 1; c_segno < c_segments_available_new; c_segno++)
39236c6e
A
2652 c_segments[c_segno - 1].c_segno = c_segno;
2653
2654 lck_mtx_lock_spin_always(c_list_lock);
2655
2656 c_segments[c_segno - 1].c_segno = c_free_segno_head;
2657 c_free_segno_head = c_segments_available;
3e170ce0 2658 c_segments_available = c_segments_available_new;
39236c6e
A
2659
2660 c_segments_busy = FALSE;
2661 thread_wakeup((event_t) (&c_segments_busy));
2662 }
2663 c_segno = c_free_segno_head;
3e170ce0
A
2664 assert(c_segno >= 0 && c_segno < c_segments_limit);
2665
39037602 2666 c_free_segno_head = (uint32_t)c_segments[c_segno].c_segno;
39236c6e 2667
3e170ce0
A
2668 /*
2669 * do the rest of the bookkeeping now while we're still behind
2670 * the list lock and grab our generation id now into a local
2671 * so that we can install it once we have the c_seg allocated
2672 */
2673 c_segment_count++;
2674 if (c_segment_count > c_segment_count_max)
2675 c_segment_count_max = c_segment_count;
2676
39236c6e
A
2677 lck_mtx_unlock_always(c_list_lock);
2678
2679 c_seg = (c_segment_t)zalloc(compressor_segment_zone);
2680 bzero((char *)c_seg, sizeof(struct c_segment));
2681
3e170ce0 2682 c_seg->c_store.c_buffer = (int32_t *)C_SEG_BUFFER_ADDRESS(c_segno);
39236c6e 2683
39236c6e 2684 lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr);
39236c6e 2685
3e170ce0
A
2686 c_seg->c_state = C_IS_EMPTY;
2687 c_seg->c_firstemptyslot = C_SLOT_MAX_INDEX;
39236c6e 2688 c_seg->c_mysegno = c_segno;
39236c6e
A
2689
2690 lck_mtx_lock_spin_always(c_list_lock);
3e170ce0
A
2691 c_empty_count++;
2692 c_seg_switch_state(c_seg, C_IS_FILLING, FALSE);
39236c6e 2693 c_segments[c_segno].c_seg = c_seg;
39037602 2694 assert(c_segments[c_segno].c_segno > c_segments_available);
39236c6e
A
2695 lck_mtx_unlock_always(c_list_lock);
2696
39236c6e 2697 *current_chead = c_seg;
39037602
A
2698
2699#if DEVELOPMENT || DEBUG
2700 C_SEG_MAKE_WRITEABLE(c_seg);
2701#endif
2702
39236c6e 2703 }
3e170ce0
A
2704 c_seg_alloc_nextslot(c_seg);
2705
2706 size_to_populate = C_SEG_ALLOCSIZE - C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset);
2707
2708 if (size_to_populate) {
2709
2710 min_needed = PAGE_SIZE + (C_SEG_ALLOCSIZE - C_SEG_BUFSIZE);
39236c6e 2711
3e170ce0 2712 if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset) < (unsigned) min_needed) {
39236c6e 2713
3e170ce0
A
2714 if (size_to_populate > C_SEG_MAX_POPULATE_SIZE)
2715 size_to_populate = C_SEG_MAX_POPULATE_SIZE;
39236c6e 2716
39037602 2717 kernel_memory_populate(compressor_map,
3e170ce0
A
2718 (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset],
2719 size_to_populate,
2720 KMA_COMPRESSOR,
2721 VM_KERN_MEMORY_COMPRESSOR);
2722 } else
2723 size_to_populate = 0;
39236c6e 2724 }
39236c6e
A
2725 PAGE_REPLACEMENT_DISALLOWED(TRUE);
2726
2727 lck_mtx_lock_spin_always(&c_seg->c_lock);
2728
3e170ce0
A
2729 if (size_to_populate)
2730 c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(size_to_populate);
2731
39236c6e
A
2732 return (c_seg);
2733}
2734
2735
39236c6e
A
2736static void
2737c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead)
2738{
2739 uint32_t unused_bytes;
2740 uint32_t offset_to_depopulate;
3e170ce0
A
2741 int new_state = C_ON_AGE_Q;
2742 clock_sec_t sec;
2743 clock_nsec_t nsec;
39236c6e
A
2744
2745 unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset));
2746
2747 if (unused_bytes) {
2748
2749 offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset)));
2750
2751 /*
2752 * release the extra physical page(s) at the end of the segment
2753 */
2754 lck_mtx_unlock_always(&c_seg->c_lock);
2755
2756 kernel_memory_depopulate(
39037602 2757 compressor_map,
39236c6e
A
2758 (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate],
2759 unused_bytes,
2760 KMA_COMPRESSOR);
2761
2762 lck_mtx_lock_spin_always(&c_seg->c_lock);
2763
2764 c_seg->c_populated_offset = offset_to_depopulate;
2765 }
3e170ce0 2766 assert(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) <= C_SEG_BUFSIZE);
39236c6e 2767
39037602
A
2768#if DEVELOPMENT || DEBUG
2769 {
2770 boolean_t c_seg_was_busy = FALSE;
2771
2772 if ( !c_seg->c_busy)
2773 C_SEG_BUSY(c_seg);
2774 else
2775 c_seg_was_busy = TRUE;
2776
2777 lck_mtx_unlock_always(&c_seg->c_lock);
2778
2779 C_SEG_WRITE_PROTECT(c_seg);
2780
2781 lck_mtx_lock_spin_always(&c_seg->c_lock);
2782
2783 if (c_seg_was_busy == FALSE)
2784 C_SEG_WAKEUP_DONE(c_seg);
2785 }
2786#endif
2787
3e170ce0 2788#if CONFIG_FREEZE
39037602
A
2789 if (current_chead == (c_segment_t*)&freezer_chead &&
2790 VM_CONFIG_SWAP_IS_PRESENT &&
2791 VM_CONFIG_FREEZER_SWAP_IS_ACTIVE &&
3e170ce0
A
2792 c_freezer_swapout_count < VM_MAX_FREEZER_CSEG_SWAP_COUNT) {
2793 new_state = C_ON_SWAPOUT_Q;
2794 }
2795#endif /* CONFIG_FREEZE */
39236c6e 2796
3e170ce0
A
2797 clock_get_system_nanotime(&sec, &nsec);
2798 c_seg->c_creation_ts = (uint32_t)sec;
2799
2800 lck_mtx_lock_spin_always(c_list_lock);
2801
2802#if CONFIG_FREEZE
2803 if (c_seg->c_state == C_ON_SWAPOUT_Q)
2804 c_freezer_swapout_count++;
2805#endif /* CONFIG_FREEZE */
2806
2807 c_seg->c_generation_id = c_generation_id++;
2808 c_seg_switch_state(c_seg, new_state, FALSE);
2809
39037602
A
2810 if (c_seg->c_state == C_ON_AGE_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
2811 c_seg_need_delayed_compaction(c_seg, TRUE);
2812
3e170ce0
A
2813 lck_mtx_unlock_always(c_list_lock);
2814
2815#if CONFIG_FREEZE
2816 if (c_seg->c_state == C_ON_SWAPOUT_Q)
2817 thread_wakeup((event_t)&c_swapout_list_head);
2818#endif /* CONFIG_FREEZE */
2819
39236c6e
A
2820 *current_chead = NULL;
2821}
2822
39037602 2823
39236c6e
A
2824/*
2825 * returns with c_seg locked
2826 */
2827void
39037602 2828c_seg_swapin_requeue(c_segment_t c_seg, boolean_t has_data, boolean_t minor_compact_ok, boolean_t age_on_swapin_q)
39236c6e
A
2829{
2830 clock_sec_t sec;
2831 clock_nsec_t nsec;
2832
2833 clock_get_system_nanotime(&sec, &nsec);
2834
2835 lck_mtx_lock_spin_always(c_list_lock);
2836 lck_mtx_lock_spin_always(&c_seg->c_lock);
2837
39037602
A
2838 assert(c_seg->c_busy_swapping);
2839 assert(c_seg->c_busy);
2840
3e170ce0
A
2841 c_seg->c_busy_swapping = 0;
2842
2843 if (c_seg->c_overage_swap == TRUE) {
2844 c_overage_swapped_count--;
2845 c_seg->c_overage_swap = FALSE;
2846 }
2847 if (has_data == TRUE) {
39037602
A
2848 if (age_on_swapin_q == TRUE)
2849 c_seg_switch_state(c_seg, C_ON_SWAPPEDIN_Q, FALSE);
2850 else
2851 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
2852
2853 if (minor_compact_ok == TRUE && !c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
2854 c_seg_need_delayed_compaction(c_seg, TRUE);
39236c6e 2855 } else {
3e170ce0
A
2856 c_seg->c_store.c_buffer = (int32_t*) NULL;
2857 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
39236c6e 2858
3e170ce0 2859 c_seg_switch_state(c_seg, C_ON_BAD_Q, FALSE);
39236c6e 2860 }
39236c6e 2861 c_seg->c_swappedin_ts = (uint32_t)sec;
39236c6e
A
2862
2863 lck_mtx_unlock_always(c_list_lock);
2864}
2865
2866
2867
2868/*
39037602 2869 * c_seg has to be locked and is returned locked if the c_seg isn't freed
39236c6e 2870 * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE
39037602 2871 * c_seg_swapin returns 1 if the c_seg was freed, 0 otherwise
39236c6e
A
2872 */
2873
39037602
A
2874int
2875c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction, boolean_t age_on_swapin_q)
39236c6e
A
2876{
2877 vm_offset_t addr = 0;
2878 uint32_t io_size = 0;
2879 uint64_t f_offset;
2880
3e170ce0
A
2881 assert(C_SEG_IS_ONDISK(c_seg));
2882
39236c6e 2883#if !CHECKSUM_THE_SWAP
3e170ce0 2884 c_seg_trim_tail(c_seg);
39236c6e
A
2885#endif
2886 io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2887 f_offset = c_seg->c_store.c_swap_handle;
fe8ab488
A
2888
2889 C_SEG_BUSY(c_seg);
3e170ce0 2890 c_seg->c_busy_swapping = 1;
ecc0ceb4
A
2891
2892 /*
2893 * This thread is likely going to block for I/O.
2894 * Make sure it is ready to run when the I/O completes because
2895 * it needs to clear the busy bit on the c_seg so that other
2896 * waiting threads can make progress too. To do that, boost
2897 * the rwlock_count so that the priority is boosted.
2898 */
2899 set_thread_rwlock_boost();
39236c6e 2900 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e 2901
3e170ce0 2902 PAGE_REPLACEMENT_DISALLOWED(FALSE);
39236c6e 2903
3e170ce0 2904 addr = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
39037602 2905 c_seg->c_store.c_buffer = (int32_t*) addr;
39236c6e 2906
39037602 2907 kernel_memory_populate(compressor_map, addr, io_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
39236c6e 2908
39037602 2909 if (vm_swap_get(c_seg, f_offset, io_size) != KERN_SUCCESS) {
3e170ce0 2910 PAGE_REPLACEMENT_DISALLOWED(TRUE);
39236c6e 2911
39037602 2912 c_seg->c_store.c_swap_handle = f_offset;
39236c6e 2913
39037602
A
2914 kernel_memory_depopulate(compressor_map, addr, io_size, KMA_COMPRESSOR);
2915
2916 c_seg_swapin_requeue(c_seg, FALSE, TRUE, age_on_swapin_q);
3e170ce0
A
2917 } else {
2918 c_seg->c_store.c_buffer = (int32_t*) addr;
fe8ab488 2919#if ENCRYPTED_SWAP
3e170ce0 2920 vm_swap_decrypt(c_seg);
fe8ab488 2921#endif /* ENCRYPTED_SWAP */
39236c6e
A
2922
2923#if CHECKSUM_THE_SWAP
3e170ce0
A
2924 if (c_seg->cseg_swap_size != io_size)
2925 panic("swapin size doesn't match swapout size");
39236c6e 2926
3e170ce0
A
2927 if (c_seg->cseg_hash != hash_string((char*) c_seg->c_store.c_buffer, (int)io_size)) {
2928 panic("c_seg_swapin - Swap hash mismatch\n");
2929 }
39236c6e
A
2930#endif /* CHECKSUM_THE_SWAP */
2931
3e170ce0 2932 PAGE_REPLACEMENT_DISALLOWED(TRUE);
39236c6e 2933
39037602
A
2934 c_seg_swapin_requeue(c_seg, TRUE, force_minor_compaction == TRUE ? FALSE : TRUE, age_on_swapin_q);
2935
2936 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2937
3e170ce0 2938 if (force_minor_compaction == TRUE) {
39037602
A
2939 if (c_seg_minor_compaction_and_unlock(c_seg, FALSE)) {
2940 /*
2941 * Drop the rwlock_count so that the thread priority
2942 * is returned back to where it is supposed to be.
2943 */
2944 clear_thread_rwlock_boost();
2945 return (1);
2946 }
2947
3e170ce0 2948 lck_mtx_lock_spin_always(&c_seg->c_lock);
3e170ce0 2949 }
3e170ce0
A
2950 }
2951 C_SEG_WAKEUP_DONE(c_seg);
ecc0ceb4
A
2952
2953 /*
2954 * Drop the rwlock_count so that the thread priority
2955 * is returned back to where it is supposed to be.
2956 */
2957 clear_thread_rwlock_boost();
39037602
A
2958
2959 return (0);
3e170ce0
A
2960}
2961
2962
2963static void
2964c_segment_sv_hash_drop_ref(int hash_indx)
2965{
2966 struct c_sv_hash_entry o_sv_he, n_sv_he;
2967
2968 while (1) {
2969
2970 o_sv_he.he_record = c_segment_sv_hash_table[hash_indx].he_record;
2971
2972 n_sv_he.he_ref = o_sv_he.he_ref - 1;
2973 n_sv_he.he_data = o_sv_he.he_data;
2974
2975 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_indx].he_record) == TRUE) {
2976 if (n_sv_he.he_ref == 0)
2977 OSAddAtomic(-1, &c_segment_svp_in_hash);
2978 break;
2979 }
2980 }
2981}
2982
2983
2984static int
2985c_segment_sv_hash_insert(uint32_t data)
2986{
2987 int hash_sindx;
2988 int misses;
2989 struct c_sv_hash_entry o_sv_he, n_sv_he;
2990 boolean_t got_ref = FALSE;
2991
2992 if (data == 0)
2993 OSAddAtomic(1, &c_segment_svp_zero_compressions);
2994 else
2995 OSAddAtomic(1, &c_segment_svp_nonzero_compressions);
2996
2997 hash_sindx = data & C_SV_HASH_MASK;
2998
2999 for (misses = 0; misses < C_SV_HASH_MAX_MISS; misses++)
3000 {
3001 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
3002
3003 while (o_sv_he.he_data == data || o_sv_he.he_ref == 0) {
3004 n_sv_he.he_ref = o_sv_he.he_ref + 1;
3005 n_sv_he.he_data = data;
3006
3007 if (OSCompareAndSwap64((UInt64)o_sv_he.he_record, (UInt64)n_sv_he.he_record, (UInt64 *) &c_segment_sv_hash_table[hash_sindx].he_record) == TRUE) {
3008 if (n_sv_he.he_ref == 1)
3009 OSAddAtomic(1, &c_segment_svp_in_hash);
3010 got_ref = TRUE;
3011 break;
39236c6e 3012 }
3e170ce0 3013 o_sv_he.he_record = c_segment_sv_hash_table[hash_sindx].he_record;
39236c6e 3014 }
3e170ce0
A
3015 if (got_ref == TRUE)
3016 break;
3017 hash_sindx++;
3018
3019 if (hash_sindx == C_SV_HASH_SIZE)
3020 hash_sindx = 0;
39236c6e 3021 }
3e170ce0
A
3022 if (got_ref == FALSE)
3023 return(-1);
39236c6e 3024
3e170ce0
A
3025 return (hash_sindx);
3026}
3027
3028
3029#if RECORD_THE_COMPRESSED_DATA
3030
3031static void
3032c_compressed_record_data(char *src, int c_size)
3033{
3034 if ((c_compressed_record_cptr + c_size + 4) >= c_compressed_record_ebuf)
3035 panic("c_compressed_record_cptr >= c_compressed_record_ebuf");
3036
3037 *(int *)((void *)c_compressed_record_cptr) = c_size;
3038
3039 c_compressed_record_cptr += 4;
3040
3041 memcpy(c_compressed_record_cptr, src, c_size);
3042 c_compressed_record_cptr += c_size;
39236c6e 3043}
3e170ce0 3044#endif
39236c6e
A
3045
3046
3047static int
3048c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf)
3049{
3050 int c_size;
04b8595b 3051 int c_rounded_size = 0;
39236c6e
A
3052 int max_csize;
3053 c_slot_t cs;
3054 c_segment_t c_seg;
3055
3056 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0);
3057retry:
3058 if ((c_seg = c_seg_allocate(current_chead)) == NULL)
3059 return (1);
3060 /*
3061 * returns with c_seg lock held
3e170ce0
A
3062 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
3063 * c_nextslot has been allocated and
3064 * c_store.c_buffer populated
39236c6e 3065 */
3e170ce0
A
3066 assert(c_seg->c_state == C_IS_FILLING);
3067
39236c6e
A
3068 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot);
3069
3070 cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr);
fe8ab488
A
3071 assert(slot_ptr == (c_slot_mapping_t)C_SLOT_UNPACK_PTR(cs));
3072
39236c6e
A
3073 cs->c_offset = c_seg->c_nextoffset;
3074
3075 max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset);
3076
3077 if (max_csize > PAGE_SIZE)
3078 max_csize = PAGE_SIZE;
3079
39236c6e
A
3080#if CHECKSUM_THE_DATA
3081 cs->c_hash_data = hash_string(src, PAGE_SIZE);
3082#endif
39236c6e 3083
39037602
A
3084 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3085 } else {
3e170ce0 3086 c_size = WKdm_compress_new((const WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
fe8ab488 3087 (WK_word *)(uintptr_t)scratch_buf, max_csize - 4);
39037602 3088 }
39236c6e
A
3089 assert(c_size <= (max_csize - 4) && c_size >= -1);
3090
3091 if (c_size == -1) {
3092
3093 if (max_csize < PAGE_SIZE) {
3094 c_current_seg_filled(c_seg, current_chead);
3e170ce0 3095 assert(*current_chead == NULL);
39236c6e 3096
3e170ce0 3097 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e 3098
3e170ce0 3099 PAGE_REPLACEMENT_DISALLOWED(FALSE);
39236c6e
A
3100 goto retry;
3101 }
3102 c_size = PAGE_SIZE;
3103
3104 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3e170ce0
A
3105
3106 OSAddAtomic(1, &c_segment_noncompressible_pages);
3107
3108 } else if (c_size == 0) {
3109 int hash_index;
3110
3111 /*
3112 * special case - this is a page completely full of a single 32 bit value
3113 */
3114 hash_index = c_segment_sv_hash_insert(*(uint32_t *)(uintptr_t)src);
3115
3116 if (hash_index != -1) {
3117 slot_ptr->s_cindx = hash_index;
3118 slot_ptr->s_cseg = C_SV_CSEG_ID;
3119
3120 OSAddAtomic(1, &c_segment_svp_hash_succeeded);
3121#if RECORD_THE_COMPRESSED_DATA
3122 c_compressed_record_data(src, 4);
3123#endif
3124 goto sv_compression;
3125 }
3126 c_size = 4;
3127
3128 memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size);
3129
3130 OSAddAtomic(1, &c_segment_svp_hash_failed);
39236c6e 3131 }
3e170ce0
A
3132
3133#if RECORD_THE_COMPRESSED_DATA
3134 c_compressed_record_data((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3135#endif
3136
39236c6e
A
3137#if CHECKSUM_THE_COMPRESSED_DATA
3138 cs->c_hash_compressed_data = hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size);
3139#endif
3140 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3141
3142 PACK_C_SIZE(cs, c_size);
3143 c_seg->c_bytes_used += c_rounded_size;
3144 c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
3145
3146 slot_ptr->s_cindx = c_seg->c_nextslot++;
3147 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
3148 slot_ptr->s_cseg = c_seg->c_mysegno + 1;
3149
3e170ce0
A
3150sv_compression:
3151 if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX_INDEX) {
39236c6e 3152 c_current_seg_filled(c_seg, current_chead);
3e170ce0
A
3153 assert(*current_chead == NULL);
3154 }
3155 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e
A
3156
3157 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3158
3e170ce0
A
3159#if RECORD_THE_COMPRESSED_DATA
3160 if ((c_compressed_record_cptr - c_compressed_record_sbuf) >= C_SEG_ALLOCSIZE) {
3161 c_compressed_record_write(c_compressed_record_sbuf, (int)(c_compressed_record_cptr - c_compressed_record_sbuf));
3162 c_compressed_record_cptr = c_compressed_record_sbuf;
3163 }
3164#endif
3165 if (c_size) {
3166 OSAddAtomic64(c_size, &c_segment_compressed_bytes);
3167 OSAddAtomic64(c_rounded_size, &compressor_bytes_used);
3168 }
39236c6e 3169 OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes);
39236c6e
A
3170
3171 OSAddAtomic(1, &c_segment_pages_compressed);
3172 OSAddAtomic(1, &sample_period_compression_count);
3173
3174 KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0);
3175
39236c6e
A
3176 return (0);
3177}
3178
39037602
A
3179static inline void sv_decompress(int32_t *ddst, int32_t pattern) {
3180#if __x86_64__
3181 memset_word(ddst, pattern, PAGE_SIZE / sizeof(int32_t));
3182#else
3183 size_t i;
3184
3185 /* Unroll the pattern fill loop 4x to encourage the
3186 * compiler to emit NEON stores, cf.
3187 * <rdar://problem/25839866> Loop autovectorization
3188 * anomalies.
3189 * We use separate loops for each PAGE_SIZE
3190 * to allow the autovectorizer to engage, as PAGE_SIZE
3191 * is currently not a constant.
3192 */
3193
3194 if (PAGE_SIZE == 4096) {
3195 for (i = 0; i < (4096U / sizeof(int32_t)); i += 4) {
3196 *ddst++ = pattern;
3197 *ddst++ = pattern;
3198 *ddst++ = pattern;
3199 *ddst++ = pattern;
3200 }
3201 } else {
3202 assert(PAGE_SIZE == 16384);
3203 for (i = 0; i < (int)(16384U / sizeof(int32_t)); i += 4) {
3204 *ddst++ = pattern;
3205 *ddst++ = pattern;
3206 *ddst++ = pattern;
3207 *ddst++ = pattern;
3208 }
3209 }
3210#endif
3211}
39236c6e
A
3212
3213static int
3214c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot)
3215{
3216 c_slot_t cs;
3217 c_segment_t c_seg;
39037602 3218 uint32_t c_segno;
39236c6e
A
3219 int c_indx;
3220 int c_rounded_size;
3221 uint32_t c_size;
3222 int retval = 0;
39236c6e
A
3223 boolean_t need_unlock = TRUE;
3224 boolean_t consider_defragmenting = FALSE;
3e170ce0
A
3225 boolean_t kdp_mode = FALSE;
3226
39037602 3227 if (__improbable(flags & C_KDP)) {
3e170ce0
A
3228 if (not_in_kdp) {
3229 panic("C_KDP passed to decompress page from outside of debugger context");
3230 }
3231
3232 assert((flags & C_KEEP) == C_KEEP);
3233 assert((flags & C_DONT_BLOCK) == C_DONT_BLOCK);
3234
3235 if ((flags & (C_DONT_BLOCK | C_KEEP)) != (C_DONT_BLOCK | C_KEEP)) {
3236 return (-2);
3237 }
3238
3239 kdp_mode = TRUE;
39037602 3240 *zeroslot = 0;
3e170ce0 3241 }
39236c6e
A
3242
3243ReTry:
39037602 3244 if (__probable(!kdp_mode)) {
3e170ce0
A
3245 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3246 } else {
3247 if (kdp_lck_rw_lock_is_acquired_exclusive(&c_master_lock)) {
3248 return (-2);
3249 }
3250 }
fe8ab488 3251
39236c6e 3252#if HIBERNATION
fe8ab488
A
3253 /*
3254 * if hibernation is enabled, it indicates (via a call
3255 * to 'vm_decompressor_lock' that no further
3256 * decompressions are allowed once it reaches
3257 * the point of flushing all of the currently dirty
3258 * anonymous memory through the compressor and out
3259 * to disk... in this state we allow freeing of compressed
3260 * pages and must honor the C_DONT_BLOCK case
3261 */
3262 if (dst && decompressions_blocked == TRUE) {
3263 if (flags & C_DONT_BLOCK) {
3264
39037602 3265 if (__probable(!kdp_mode)) {
3e170ce0
A
3266 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3267 }
fe8ab488
A
3268
3269 *zeroslot = 0;
3270 return (-2);
39236c6e 3271 }
fe8ab488
A
3272 /*
3273 * it's safe to atomically assert and block behind the
3274 * lock held in shared mode because "decompressions_blocked" is
3275 * only set and cleared and the thread_wakeup done when the lock
3276 * is held exclusively
3277 */
3278 assert_wait((event_t)&decompressions_blocked, THREAD_UNINT);
3279
3280 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3281
3282 thread_block(THREAD_CONTINUE_NULL);
3283
3284 goto ReTry;
39236c6e
A
3285 }
3286#endif
39236c6e 3287 /* s_cseg is actually "segno+1" */
39037602
A
3288 c_segno = slot_ptr->s_cseg - 1;
3289
3290 if (__improbable(c_segno >= c_segments_available))
3291 panic("c_decompress_page: c_segno %d >= c_segments_available %d, slot_ptr(%p), slot_data(%x)",
3292 c_segno, c_segments_available, slot_ptr, *(int *)((void *)slot_ptr));
3293
3294 if (__improbable(c_segments[c_segno].c_segno < c_segments_available))
3295 panic("c_decompress_page: c_segno %d is free, slot_ptr(%p), slot_data(%x)",
3296 c_segno, slot_ptr, *(int *)((void *)slot_ptr));
39236c6e 3297
39037602
A
3298 c_seg = c_segments[c_segno].c_seg;
3299
3300 if (__probable(!kdp_mode)) {
3e170ce0
A
3301 lck_mtx_lock_spin_always(&c_seg->c_lock);
3302 } else {
3303 if (kdp_lck_mtx_lock_spin_is_acquired(&c_seg->c_lock)) {
3304 return (-2);
3305 }
3306 }
39236c6e 3307
3e170ce0 3308 assert(c_seg->c_state != C_IS_EMPTY && c_seg->c_state != C_IS_FREE);
39236c6e 3309
39037602
A
3310 if (dst == NULL && c_seg->c_busy_swapping) {
3311 assert(c_seg->c_busy);
3312
3313 goto bypass_busy_check;
3314 }
3e170ce0
A
3315 if (flags & C_DONT_BLOCK) {
3316 if (c_seg->c_busy || (C_SEG_IS_ONDISK(c_seg) && dst)) {
39236c6e
A
3317 *zeroslot = 0;
3318
3e170ce0 3319 retval = -2;
39236c6e
A
3320 goto done;
3321 }
3322 }
3323 if (c_seg->c_busy) {
3324
3325 PAGE_REPLACEMENT_DISALLOWED(FALSE);
fe8ab488 3326
39236c6e
A
3327 c_seg_wait_on_busy(c_seg);
3328
3329 goto ReTry;
3330 }
39037602
A
3331bypass_busy_check:
3332
39236c6e
A
3333 c_indx = slot_ptr->s_cindx;
3334
39037602
A
3335 if (__improbable(c_indx >= c_seg->c_nextslot))
3336 panic("c_decompress_page: c_indx %d >= c_nextslot %d, c_seg(%p), slot_ptr(%p), slot_data(%x)",
3337 c_indx, c_seg->c_nextslot, c_seg, slot_ptr, *(int *)((void *)slot_ptr));
3338
39236c6e
A
3339 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
3340
3341 c_size = UNPACK_C_SIZE(cs);
3342
39037602
A
3343 if (__improbable(c_size == 0))
3344 panic("c_decompress_page: c_size == 0, c_seg(%p), slot_ptr(%p), slot_data(%x)",
3345 c_seg, slot_ptr, *(int *)((void *)slot_ptr));
3346
39236c6e
A
3347 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3348
3349 if (dst) {
3350 uint32_t age_of_cseg;
3351 clock_sec_t cur_ts_sec;
3352 clock_nsec_t cur_ts_nsec;
3353
3e170ce0
A
3354 if (C_SEG_IS_ONDISK(c_seg)) {
3355 assert(kdp_mode == FALSE);
39037602
A
3356 retval = c_seg_swapin(c_seg, FALSE, TRUE);
3357 assert(retval == 0);
3e170ce0
A
3358
3359 retval = 1;
39236c6e 3360 }
3e170ce0
A
3361 if (c_seg->c_state == C_ON_BAD_Q) {
3362 assert(c_seg->c_store.c_buffer == NULL);
3363
3364 retval = -1;
39236c6e
A
3365 goto c_seg_invalid_data;
3366 }
3367#if CHECKSUM_THE_COMPRESSED_DATA
3368 if (cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size))
39037602 3369 panic("compressed data doesn't match original hash: 0x%x, seg: %p, offset: %d, c_size: %d", cs->c_hash_compressed_data, c_seg, cs->c_offset, c_size);
39236c6e
A
3370#endif
3371 if (c_rounded_size == PAGE_SIZE) {
3372 /*
3373 * page wasn't compressible... just copy it out
3374 */
3375 memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE);
3e170ce0
A
3376 } else if (c_size == 4) {
3377 int32_t data;
3378 int32_t *dptr;
3379
3380 /*
3381 * page was populated with a single value
3382 * that didn't fit into our fast hash
3383 * so we packed it in as a single non-compressed value
3384 * that we need to populate the page with
3385 */
3386 dptr = (int32_t *)(uintptr_t)dst;
3387 data = *(int32_t *)(&c_seg->c_store.c_buffer[cs->c_offset]);
39037602 3388 sv_decompress(dptr, data);
39236c6e
A
3389 } else {
3390 uint32_t my_cpu_no;
3391 char *scratch_buf;
3392
39037602 3393 if (__probable(!kdp_mode)) {
3e170ce0
A
3394 /*
3395 * we're behind the c_seg lock held in spin mode
3396 * which means pre-emption is disabled... therefore
3397 * the following sequence is atomic and safe
3398 */
3399 my_cpu_no = cpu_number();
39236c6e 3400
3e170ce0 3401 assert(my_cpu_no < compressor_cpus);
39236c6e 3402
39037602 3403 scratch_buf = &compressor_scratch_bufs[my_cpu_no * vm_compressor_get_decode_scratch_size()];
3e170ce0
A
3404 } else {
3405 scratch_buf = kdp_compressor_scratch_buf;
3406 }
39037602
A
3407
3408 if (vm_compressor_algorithm() != VM_COMPRESSOR_DEFAULT_CODEC) {
3409 } else {
39236c6e
A
3410 WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset],
3411 (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size);
39037602 3412 }
39236c6e
A
3413 }
3414
3415#if CHECKSUM_THE_DATA
3416 if (cs->c_hash_data != hash_string(dst, PAGE_SIZE))
39037602
A
3417 panic("decompressed data doesn't match original cs: %p, hash: %d, offset: %d, c_size: %d", cs, cs->c_hash_data, cs->c_offset, c_size);
3418
39236c6e 3419#endif
3e170ce0 3420 if (c_seg->c_swappedin_ts == 0 && !kdp_mode) {
39236c6e
A
3421
3422 clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
3423
3424 age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts;
39236c6e
A
3425 if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE)
3426 OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]);
3427 else
3428 OSAddAtomic(1, &overage_decompressions_during_sample_period);
3429
3430 OSAddAtomic(1, &sample_period_decompression_count);
3431 }
39236c6e
A
3432 }
3433c_seg_invalid_data:
3434
39236c6e
A
3435 if (flags & C_KEEP) {
3436 *zeroslot = 0;
3437 goto done;
3438 }
3e170ce0 3439 assert(kdp_mode == FALSE);
39037602 3440
39236c6e
A
3441 c_seg->c_bytes_unused += c_rounded_size;
3442 c_seg->c_bytes_used -= c_rounded_size;
3443 PACK_C_SIZE(cs, 0);
3444
3445 if (c_indx < c_seg->c_firstemptyslot)
3446 c_seg->c_firstemptyslot = c_indx;
3447
3448 OSAddAtomic(-1, &c_segment_pages_compressed);
3449
3e170ce0 3450 if (c_seg->c_state != C_ON_BAD_Q && !(C_SEG_IS_ONDISK(c_seg))) {
39236c6e 3451 /*
3e170ce0 3452 * C_SEG_IS_ONDISK == TRUE can occur when we're doing a
39236c6e
A
3453 * free of a compressed page (i.e. dst == NULL)
3454 */
3455 OSAddAtomic64(-c_rounded_size, &compressor_bytes_used);
3456 }
39037602
A
3457 if (c_seg->c_busy_swapping) {
3458 /*
3459 * bypass case for c_busy_swapping...
3460 * let the swapin/swapout paths deal with putting
3461 * the c_seg on the minor compaction queue if needed
3462 */
3463 assert(c_seg->c_busy);
3464 goto done;
3465 }
3466 assert(!c_seg->c_busy);
3467
3e170ce0 3468 if (c_seg->c_state != C_IS_FILLING) {
39236c6e 3469 if (c_seg->c_bytes_used == 0) {
3e170ce0 3470 if ( !(C_SEG_IS_ONDISK(c_seg))) {
8a3053a0
A
3471 int pages_populated;
3472
3473 pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE;
3474 c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(0);
3475
3476 if (pages_populated) {
3e170ce0
A
3477
3478 assert(c_seg->c_state != C_ON_BAD_Q);
8a3053a0
A
3479 assert(c_seg->c_store.c_buffer != NULL);
3480
fe8ab488 3481 C_SEG_BUSY(c_seg);
8a3053a0
A
3482 lck_mtx_unlock_always(&c_seg->c_lock);
3483
39037602 3484 kernel_memory_depopulate(compressor_map, (vm_offset_t) c_seg->c_store.c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR);
8a3053a0
A
3485
3486 lck_mtx_lock_spin_always(&c_seg->c_lock);
3487 C_SEG_WAKEUP_DONE(c_seg);
3488 }
39037602
A
3489 if (!c_seg->c_on_minorcompact_q && c_seg->c_state != C_ON_SWAPOUT_Q)
3490 c_seg_need_delayed_compaction(c_seg, FALSE);
3491 } else {
3492 if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q) {
8a3053a0 3493
39037602
A
3494 c_seg_move_to_sparse_list(c_seg);
3495 consider_defragmenting = TRUE;
3496 }
3497 }
39236c6e
A
3498 } else if (c_seg->c_on_minorcompact_q) {
3499
3e170ce0
A
3500 assert(c_seg->c_state != C_ON_BAD_Q);
3501
3502 if (C_SEG_SHOULD_MINORCOMPACT(c_seg)) {
39236c6e
A
3503 c_seg_try_minor_compaction_and_unlock(c_seg);
3504 need_unlock = FALSE;
3505 }
3e170ce0 3506 } else if ( !(C_SEG_IS_ONDISK(c_seg))) {
39236c6e 3507
3e170ce0 3508 if (c_seg->c_state != C_ON_BAD_Q && c_seg->c_state != C_ON_SWAPOUT_Q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
39037602 3509 c_seg_need_delayed_compaction(c_seg, FALSE);
39236c6e 3510 }
3e170ce0 3511 } else if (c_seg->c_state != C_ON_SWAPPEDOUTSPARSE_Q && C_SEG_ONDISK_IS_SPARSE(c_seg)) {
39236c6e
A
3512
3513 c_seg_move_to_sparse_list(c_seg);
3514 consider_defragmenting = TRUE;
3515 }
3516 }
3517done:
39037602 3518 if (__improbable(kdp_mode)) {
3e170ce0
A
3519 return retval;
3520 }
3521
39236c6e
A
3522 if (need_unlock == TRUE)
3523 lck_mtx_unlock_always(&c_seg->c_lock);
3524
3525 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3526
3527 if (consider_defragmenting == TRUE)
3528 vm_swap_consider_defragmenting();
fe8ab488
A
3529
3530
39236c6e
A
3531 return (retval);
3532}
3533
3534
3535int
3536vm_compressor_get(ppnum_t pn, int *slot, int flags)
3537{
3e170ce0 3538 c_slot_mapping_t slot_ptr;
39236c6e
A
3539 char *dst;
3540 int zeroslot = 1;
3541 int retval;
3542
3543#if __x86_64__
3544 dst = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
3545#else
3546#error "unsupported architecture"
3547#endif
3e170ce0
A
3548 slot_ptr = (c_slot_mapping_t)slot;
3549
3550 if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
3551 int32_t data;
3552 int32_t *dptr;
3553
3554 /*
3555 * page was populated with a single value
3556 * that found a home in our hash table
3557 * grab that value from the hash and populate the page
3558 * that we need to populate the page with
3559 */
3560 dptr = (int32_t *)(uintptr_t)dst;
3561 data = c_segment_sv_hash_table[slot_ptr->s_cindx].he_data;
3562#if __x86_64__
3563 memset_word(dptr, data, PAGE_SIZE / sizeof(int32_t));
3564#else
3565 {
3566 int i;
3567
3568 for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++)
3569 *dptr++ = data;
3570 }
3571#endif
3e170ce0 3572 if ( !(flags & C_KEEP)) {
743345f9
A
3573 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
3574
3e170ce0
A
3575 OSAddAtomic(-1, &c_segment_pages_compressed);
3576 *slot = 0;
3577 }
3578 if (data)
3579 OSAddAtomic(1, &c_segment_svp_nonzero_decompressions);
3580 else
3581 OSAddAtomic(1, &c_segment_svp_zero_decompressions);
3582
3583 return (0);
3584 }
3585
3586 retval = c_decompress_page(dst, slot_ptr, flags, &zeroslot);
39236c6e
A
3587
3588 /*
3589 * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP)
3e170ce0 3590 * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be TRUE
39236c6e
A
3591 */
3592 if (zeroslot) {
39236c6e
A
3593 *slot = 0;
3594 }
3595 /*
3596 * returns 0 if we successfully decompressed a page from a segment already in memory
3597 * returns 1 if we had to first swap in the segment, before successfully decompressing the page
3598 * returns -1 if we encountered an error swapping in the segment - decompression failed
3e170ce0 3599 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'C_SEG_IS_ONDISK' to be true
39236c6e
A
3600 */
3601 return (retval);
3602}
3603
3604
fe8ab488
A
3605int
3606vm_compressor_free(int *slot, int flags)
39236c6e 3607{
3e170ce0 3608 c_slot_mapping_t slot_ptr;
39236c6e 3609 int zeroslot = 1;
fe8ab488 3610 int retval;
39236c6e 3611
fe8ab488 3612 assert(flags == 0 || flags == C_DONT_BLOCK);
39236c6e 3613
3e170ce0
A
3614 slot_ptr = (c_slot_mapping_t)slot;
3615
3616 if (slot_ptr->s_cseg == C_SV_CSEG_ID) {
3617
3618 c_segment_sv_hash_drop_ref(slot_ptr->s_cindx);
3619 OSAddAtomic(-1, &c_segment_pages_compressed);
3620
3621 *slot = 0;
3622 return (0);
3623 }
3624 retval = c_decompress_page(NULL, slot_ptr, flags, &zeroslot);
fe8ab488
A
3625 /*
3626 * returns 0 if we successfully freed the specified compressed page
3627 * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' set
3628 */
3629
3630 if (retval == 0)
3631 *slot = 0;
3e170ce0
A
3632 else
3633 assert(retval == -2);
fe8ab488
A
3634
3635 return (retval);
39236c6e
A
3636}
3637
3638
3639int
3640vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf)
3641{
3642 char *src;
3643 int retval;
3644
39236c6e
A
3645#if __x86_64__
3646 src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
3647#else
3648#error "unsupported architecture"
3649#endif
3650 retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf);
3651
3652 return (retval);
3653}
fe8ab488
A
3654
3655void
3656vm_compressor_transfer(
3657 int *dst_slot_p,
3658 int *src_slot_p)
3659{
3660 c_slot_mapping_t dst_slot, src_slot;
3661 c_segment_t c_seg;
3662 int c_indx;
3663 c_slot_t cs;
3664
fe8ab488
A
3665 src_slot = (c_slot_mapping_t) src_slot_p;
3666
3e170ce0
A
3667 if (src_slot->s_cseg == C_SV_CSEG_ID) {
3668 *dst_slot_p = *src_slot_p;
3669 *src_slot_p = 0;
3670 return;
3671 }
3672 dst_slot = (c_slot_mapping_t) dst_slot_p;
fe8ab488
A
3673Retry:
3674 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3675 /* get segment for src_slot */
3676 c_seg = c_segments[src_slot->s_cseg -1].c_seg;
3677 /* lock segment */
3678 lck_mtx_lock_spin_always(&c_seg->c_lock);
3679 /* wait if it's busy */
3e170ce0 3680 if (c_seg->c_busy && !c_seg->c_busy_swapping) {
fe8ab488
A
3681 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3682 c_seg_wait_on_busy(c_seg);
3683 goto Retry;
3684 }
3685 /* find the c_slot */
3686 c_indx = src_slot->s_cindx;
3687 cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx);
3688 /* point the c_slot back to dst_slot instead of src_slot */
3689 cs->c_packed_ptr = C_SLOT_PACK_PTR(dst_slot);
3690 /* transfer */
3691 *dst_slot_p = *src_slot_p;
3692 *src_slot_p = 0;
3693 lck_mtx_unlock_always(&c_seg->c_lock);
3694 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3695}
3e170ce0
A
3696
3697#if CONFIG_FREEZE
3698
3699int freezer_finished_filling = 0;
3700
3701void
3702vm_compressor_finished_filling(
3703 void **current_chead)
3704{
3705 c_segment_t c_seg;
3706
3707 if ((c_seg = *(c_segment_t *)current_chead) == NULL)
3708 return;
3709
3710 assert(c_seg->c_state == C_IS_FILLING);
3711
3712 lck_mtx_lock_spin_always(&c_seg->c_lock);
3713
3714 c_current_seg_filled(c_seg, (c_segment_t *)current_chead);
3715
3716 lck_mtx_unlock_always(&c_seg->c_lock);
3717
3718 freezer_finished_filling++;
3719}
3720
3721
3722/*
3723 * This routine is used to transfer the compressed chunks from
3724 * the c_seg/cindx pointed to by slot_p into a new c_seg headed
3725 * by the current_chead and a new cindx within that c_seg.
3726 *
3727 * Currently, this routine is only used by the "freezer backed by
3728 * compressor with swap" mode to create a series of c_segs that
3729 * only contain compressed data belonging to one task. So, we
3730 * move a task's previously compressed data into a set of new
3731 * c_segs which will also hold the task's yet to be compressed data.
3732 */
3733
3734kern_return_t
3735vm_compressor_relocate(
3736 void **current_chead,
3737 int *slot_p)
3738{
3739 c_slot_mapping_t slot_ptr;
3740 c_slot_mapping_t src_slot;
3741 uint32_t c_rounded_size;
3742 uint32_t c_size;
3743 uint16_t dst_slot;
3744 c_slot_t c_dst;
3745 c_slot_t c_src;
3746 int c_indx;
3747 c_segment_t c_seg_dst = NULL;
3748 c_segment_t c_seg_src = NULL;
3749 kern_return_t kr = KERN_SUCCESS;
3750
3751
3752 src_slot = (c_slot_mapping_t) slot_p;
3753
3754 if (src_slot->s_cseg == C_SV_CSEG_ID) {
3755 /*
3756 * no need to relocate... this is a page full of a single
3757 * value which is hashed to a single entry not contained
3758 * in a c_segment_t
3759 */
3760 return (kr);
3761 }
3762
3763Relookup_dst:
3764 c_seg_dst = c_seg_allocate((c_segment_t *)current_chead);
3765 /*
3766 * returns with c_seg lock held
3767 * and PAGE_REPLACEMENT_DISALLOWED(TRUE)...
3768 * c_nextslot has been allocated and
3769 * c_store.c_buffer populated
3770 */
3771 if (c_seg_dst == NULL) {
3772 /*
3773 * Out of compression segments?
3774 */
3775 kr = KERN_RESOURCE_SHORTAGE;
3776 goto out;
3777 }
3778
3779 assert(c_seg_dst->c_busy == 0);
3780
3781 C_SEG_BUSY(c_seg_dst);
3782
3783 dst_slot = c_seg_dst->c_nextslot;
3784
3785 lck_mtx_unlock_always(&c_seg_dst->c_lock);
3786
3787Relookup_src:
3788 c_seg_src = c_segments[src_slot->s_cseg - 1].c_seg;
3789
3790 assert(c_seg_dst != c_seg_src);
3791
3792 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
3793
3794 if (C_SEG_IS_ONDISK(c_seg_src)) {
3795
3796 /*
3797 * A "thaw" can mark a process as eligible for
3798 * another freeze cycle without bringing any of
3799 * its swapped out c_segs back from disk (because
3800 * that is done on-demand).
3801 *
3802 * If the src c_seg we find for our pre-compressed
3803 * data is already on-disk, then we are dealing
3804 * with an app's data that is already packed and
3805 * swapped out. Don't do anything.
3806 */
3807
3808 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3809
3810 lck_mtx_unlock_always(&c_seg_src->c_lock);
3811
3812 c_seg_src = NULL;
3813
3814 goto out;
3815 }
3816
3817 if (c_seg_src->c_busy) {
3818
3819 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3820 c_seg_wait_on_busy(c_seg_src);
3821
3822 c_seg_src = NULL;
3823
3824 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3825
3826 goto Relookup_src;
3827 }
3828
3829 C_SEG_BUSY(c_seg_src);
3830
3831 lck_mtx_unlock_always(&c_seg_src->c_lock);
3832
3833 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3834
3835 /* find the c_slot */
3836 c_indx = src_slot->s_cindx;
3837
3838 c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, c_indx);
3839
3840 c_size = UNPACK_C_SIZE(c_src);
3841
3842 assert(c_size);
3843
3844 if (c_size > (uint32_t)(C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)c_seg_dst->c_nextoffset))) {
3845 /*
3846 * This segment is full. We need a new one.
3847 */
3848
3849 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3850
3851 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
3852 C_SEG_WAKEUP_DONE(c_seg_src);
3853 lck_mtx_unlock_always(&c_seg_src->c_lock);
3854
3855 c_seg_src = NULL;
3856
3857 lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
3858
3859 assert(c_seg_dst->c_busy);
3860 assert(c_seg_dst->c_state == C_IS_FILLING);
3861 assert(!c_seg_dst->c_on_minorcompact_q);
3862
3863 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
3864 assert(*current_chead == NULL);
3865
3866 C_SEG_WAKEUP_DONE(c_seg_dst);
3867
3868 lck_mtx_unlock_always(&c_seg_dst->c_lock);
3869
3870 c_seg_dst = NULL;
3871
3872 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3873
3874 goto Relookup_dst;
3875 }
3876
3877 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot);
3878
3879 memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size);
3880
3881 c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK;
3882
39037602 3883 cslot_copy(c_dst, c_src);
3e170ce0
A
3884 c_dst->c_offset = c_seg_dst->c_nextoffset;
3885
3886 if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot)
3887 c_seg_dst->c_firstemptyslot++;
3888
3889 c_seg_dst->c_nextslot++;
3890 c_seg_dst->c_bytes_used += c_rounded_size;
3891 c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size);
3892
3893
3894 PACK_C_SIZE(c_src, 0);
3895
3896 c_seg_src->c_bytes_used -= c_rounded_size;
3897 c_seg_src->c_bytes_unused += c_rounded_size;
3898
3899 if (c_indx < c_seg_src->c_firstemptyslot) {
3900 c_seg_src->c_firstemptyslot = c_indx;
3901 }
3902
3903 c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot);
3904
3905 PAGE_REPLACEMENT_ALLOWED(TRUE);
3906 slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst);
3907 /* <csegno=0,indx=0> would mean "empty slot", so use csegno+1 */
3908 slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1;
3909 slot_ptr->s_cindx = dst_slot;
3910
3911 PAGE_REPLACEMENT_ALLOWED(FALSE);
3912
3913out:
3914 if (c_seg_src) {
3915
3916 lck_mtx_lock_spin_always(&c_seg_src->c_lock);
3917
3918 C_SEG_WAKEUP_DONE(c_seg_src);
3919
3920 if (c_seg_src->c_bytes_used == 0 && c_seg_src->c_state != C_IS_FILLING) {
3921 if (!c_seg_src->c_on_minorcompact_q)
39037602 3922 c_seg_need_delayed_compaction(c_seg_src, FALSE);
3e170ce0
A
3923 }
3924
3925 lck_mtx_unlock_always(&c_seg_src->c_lock);
3926 }
3927
3928 if (c_seg_dst) {
3929
3930 PAGE_REPLACEMENT_DISALLOWED(TRUE);
3931
3932 lck_mtx_lock_spin_always(&c_seg_dst->c_lock);
3933
3934 if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX_INDEX) {
3935 /*
3936 * Nearing or exceeded maximum slot and offset capacity.
3937 */
3938 assert(c_seg_dst->c_busy);
3939 assert(c_seg_dst->c_state == C_IS_FILLING);
3940 assert(!c_seg_dst->c_on_minorcompact_q);
3941
3942 c_current_seg_filled(c_seg_dst, (c_segment_t *)current_chead);
3943 assert(*current_chead == NULL);
3944 }
3945
3946 C_SEG_WAKEUP_DONE(c_seg_dst);
3947
3948 lck_mtx_unlock_always(&c_seg_dst->c_lock);
3949
3950 c_seg_dst = NULL;
3951
3952 PAGE_REPLACEMENT_DISALLOWED(FALSE);
3953 }
3954
3955 return kr;
3956}
3957#endif /* CONFIG_FREEZE */