2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
34 #include <vm/vm_compressor_algorithms.h>
35 #include <vm/vm_compressor.h>
37 #define MZV_MAGIC (17185)
38 #if defined(__arm64__)
39 #include <arm/proc_reg.h>
42 #define LZ4_SCRATCH_ALIGN (64)
43 #define WKC_SCRATCH_ALIGN (64)
45 #define LZ4_SCRATCH_ALIGN (64)
46 #define WKC_SCRATCH_ALIGN (64)
48 #define memcpy_T_NT memcpy
49 #define memcpy_NT_T memcpy
52 uint8_t lz4state
[lz4_encode_scratch_size
]__attribute((aligned(LZ4_SCRATCH_ALIGN
)));
53 uint8_t wkscratch
[0] __attribute((aligned(WKC_SCRATCH_ALIGN
))); // TODO
54 } compressor_encode_scratch_t
;
57 uint8_t lz4decodestate
[lz4_encode_scratch_size
]__attribute((aligned(64)));
58 uint8_t wkdecompscratch
[0] __attribute((aligned(64)));
59 } compressor_decode_scratch_t
;
62 uint16_t lz4_selection_run
;
63 uint16_t lz4_run_length
;
64 uint16_t lz4_preselects
;
65 uint32_t lz4_total_preselects
;
66 uint16_t lz4_failure_skips
;
67 uint32_t lz4_total_failure_skips
;
68 uint16_t lz4_failure_run_length
;
69 uint16_t lz4_total_unprofitables
;
70 uint32_t lz4_total_negatives
;
71 uint32_t lz4_total_failures
;
74 compressor_tuneables_t vmctune
= {
75 .lz4_threshold
= 2048,
76 .wkdm_reeval_threshold
= 1536,
77 .lz4_max_failure_skips
= 0,
78 .lz4_max_failure_run_length
= ~0U,
79 .lz4_max_preselects
= 0,
80 .lz4_run_preselection_threshold
= ~0U,
81 .lz4_run_continue_bytes
= 0,
82 .lz4_profitable_bytes
= 0,
85 compressor_state_t vmcstate
= {
86 .lz4_selection_run
= 0,
89 .lz4_total_preselects
= 0,
90 .lz4_failure_skips
= 0,
91 .lz4_total_failure_skips
= 0,
92 .lz4_failure_run_length
= 0,
93 .lz4_total_unprofitables
= 0,
94 .lz4_total_negatives
= 0,
97 compressor_stats_t compressor_stats
;
99 enum compressor_preselect_t
{
105 vm_compressor_mode_t vm_compressor_current_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
107 boolean_t vm_compressor_force_sw_wkdm
= FALSE
;
109 boolean_t verbose
= FALSE
;
111 #define VMDBGSTAT (DEBUG)
113 #define VM_COMPRESSOR_STAT_DBG(x...) \
118 #define VM_COMPRESSOR_STAT_DBG(x...) \
123 #define VMCSTATS (DEVELOPMENT || DEBUG)
125 #define VM_COMPRESSOR_STAT(x...) \
129 //TODO make atomic where needed, decompression paths
130 #define VM_DECOMPRESSOR_STAT(x...) \
135 #define VM_COMPRESSOR_STAT(x...) \
138 #define VM_DECOMPRESSOR_STAT(x...) \
143 static inline enum compressor_preselect_t
144 compressor_preselect(void)
146 if (vmcstate
.lz4_failure_skips
>= vmctune
.lz4_max_failure_skips
) {
147 vmcstate
.lz4_failure_skips
= 0;
148 vmcstate
.lz4_failure_run_length
= 0;
151 if (vmcstate
.lz4_failure_run_length
>= vmctune
.lz4_max_failure_run_length
) {
152 vmcstate
.lz4_failure_skips
++;
153 vmcstate
.lz4_total_failure_skips
++;
157 if (vmcstate
.lz4_preselects
>= vmctune
.lz4_max_preselects
) {
158 vmcstate
.lz4_preselects
= 0;
162 if (vmcstate
.lz4_run_length
>= vmctune
.lz4_run_preselection_threshold
) {
163 vmcstate
.lz4_preselects
++;
164 vmcstate
.lz4_total_preselects
++;
171 compressor_selector_update(int lz4sz
, int didwk
, int wksz
)
173 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressions
++);
176 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressed_bytes
+= PAGE_SIZE
);
177 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compression_failures
++);
178 vmcstate
.lz4_failure_run_length
++;
179 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_failures
++);
180 vmcstate
.lz4_run_length
= 0;
182 vmcstate
.lz4_failure_run_length
= 0;
184 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressed_bytes
+= lz4sz
);
186 if (lz4sz
<= vmctune
.wkdm_reeval_threshold
) {
187 vmcstate
.lz4_run_length
= 0;
190 vmcstate
.lz4_run_length
++;
195 if (__probable(wksz
> lz4sz
)) {
196 uint32_t lz4delta
= wksz
- lz4sz
;
197 VM_COMPRESSOR_STAT(compressor_stats
.lz4_wk_compression_delta
+= lz4delta
);
198 if (lz4delta
>= vmctune
.lz4_run_continue_bytes
) {
199 vmcstate
.lz4_run_length
++;
200 } else if (lz4delta
<= vmctune
.lz4_profitable_bytes
) {
201 vmcstate
.lz4_failure_run_length
++;
202 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_unprofitables
++);
203 vmcstate
.lz4_run_length
= 0;
205 vmcstate
.lz4_run_length
= 0;
208 VM_COMPRESSOR_STAT(compressor_stats
.lz4_wk_compression_negative_delta
+= (lz4sz
- wksz
));
209 vmcstate
.lz4_failure_run_length
++;
210 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_negatives
++);
211 vmcstate
.lz4_run_length
= 0;
219 WKdm_hv(uint32_t *wkbuf
)
221 #if DEVELOPMENT || DEBUG
222 uint32_t *inw
= (uint32_t *) wkbuf
;
223 if (*inw
!= MZV_MAGIC
) {
224 if ((*inw
| *(inw
+ 1) | *(inw
+ 2)) & 0xFFFF0000) {
225 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf
, *inw
, *(inw
+ 1), *(inw
+ 2));
228 #else /* DEVELOPMENT || DEBUG */
233 //todo fix clang diagnostic
234 #pragma clang diagnostic push
235 #pragma clang diagnostic ignored "-Wincompatible-pointer-types"
237 #if defined(__arm64__)
241 WKdmD(WK_word
* src_buf
, WK_word
* dest_buf
, WK_word
* scratch
, unsigned int bytes
,
242 __unused
uint32_t *pop_count
)
244 #if defined(__arm64__)
247 #if defined(__arm64__)
248 if (PAGE_SIZE
== 4096) {
249 WKdm_decompress_4k(src_buf
, dest_buf
, scratch
, bytes
);
251 __unused
uint64_t wdsstart
;
253 VM_COMPRESSOR_STAT_DBG(wdsstart
= mach_absolute_time());
254 WKdm_decompress_16k(src_buf
, dest_buf
, scratch
, bytes
);
256 VM_COMPRESSOR_STAT_DBG(compressor_stats
.wks_dabstime
+= mach_absolute_time() - wdsstart
);
257 VM_COMPRESSOR_STAT(compressor_stats
.wks_decompressions
++);
259 #else /* !defined arm64 */
260 WKdm_decompress_new(src_buf
, dest_buf
, scratch
, bytes
);
264 #if DEVELOPMENT || DEBUG
265 int precompy
, wkswhw
;
269 WKdmC(WK_word
* src_buf
, WK_word
* dest_buf
, WK_word
* scratch
,
270 boolean_t
*incomp_copy
, unsigned int limit
, __unused
uint32_t *pop_count
)
274 #if defined(__arm64__)
275 if (PAGE_SIZE
== 4096) {
276 wkcval
= WKdm_compress_4k(src_buf
, dest_buf
, scratch
, limit
);
278 __unused
uint64_t wcswstart
;
280 VM_COMPRESSOR_STAT_DBG(wcswstart
= mach_absolute_time());
282 int wkswsz
= WKdm_compress_16k(src_buf
, dest_buf
, scratch
, limit
);
284 VM_COMPRESSOR_STAT_DBG(compressor_stats
.wks_cabstime
+= mach_absolute_time() - wcswstart
);
285 VM_COMPRESSOR_STAT(compressor_stats
.wks_compressions
++);
289 wkcval
= WKdm_compress_new(src_buf
, dest_buf
, scratch
, limit
);
296 metacompressor(const uint8_t *in
, uint8_t *cdst
, int32_t outbufsz
, uint16_t *codec
,
297 void *cscratchin
, boolean_t
*incomp_copy
, uint32_t *pop_count_p
)
300 int dowk
= FALSE
, dolz4
= FALSE
, skiplz4
= FALSE
;
301 int insize
= PAGE_SIZE
;
302 compressor_encode_scratch_t
*cscratch
= cscratchin
;
303 /* Not all paths lead to an inline population count. */
304 uint32_t pop_count
= C_SLOT_NO_POPCOUNT
;
306 if (vm_compressor_current_codec
== CMODE_WK
) {
308 } else if (vm_compressor_current_codec
== CMODE_LZ4
) {
310 } else if (vm_compressor_current_codec
== CMODE_HYB
) {
311 enum compressor_preselect_t presel
= compressor_preselect();
312 if (presel
== CPRESELLZ4
) {
315 } else if (presel
== CSKIPLZ4
) {
319 assert(presel
== CPRESELWK
);
326 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressions
++);
327 sz
= WKdmC(in
, cdst
, &cscratch
->wkscratch
[0], incomp_copy
, outbufsz
, &pop_count
);
330 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+= PAGE_SIZE
);
331 VM_COMPRESSOR_STAT(compressor_stats
.wk_compression_failures
++);
333 if (vm_compressor_current_codec
== CMODE_HYB
) {
337 } else if (sz
== 0) {
338 VM_COMPRESSOR_STAT(compressor_stats
.wk_sv_compressions
++);
339 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+= 4);
341 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+= sz
);
345 if (vm_compressor_current_codec
== CMODE_HYB
) {
346 if (((sz
== -1) || (sz
>= vmctune
.lz4_threshold
)) && (skiplz4
== FALSE
)) {
349 #if DEVELOPMENT || DEBUG
350 int wkc
= (sz
== -1) ? PAGE_SIZE
: sz
;
352 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressions_exclusive
++);
353 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_exclusive
+= wkc
);
367 sz
= (int) lz4raw_encode_buffer(cdst
, outbufsz
, in
, insize
, &cscratch
->lz4state
[0]);
369 compressor_selector_update(sz
, dowk
, wksz
);
376 assert(pop_count_p
!= NULL
);
377 *pop_count_p
= pop_count
;
382 metadecompressor(const uint8_t *source
, uint8_t *dest
, uint32_t csize
,
383 uint16_t ccodec
, void *compressor_dscratchin
, uint32_t *pop_count_p
)
385 int dolz4
= (ccodec
== CCLZ4
);
387 compressor_decode_scratch_t
*compressor_dscratch
= compressor_dscratchin
;
388 /* Not all paths lead to an inline population count. */
389 uint32_t pop_count
= C_SLOT_NO_POPCOUNT
;
393 rval
= (int)lz4raw_decode_buffer(dest
, PAGE_SIZE
, source
, csize
, &compressor_dscratch
->lz4decodestate
[0]);
394 VM_DECOMPRESSOR_STAT(compressor_stats
.lz4_decompressions
+= 1);
395 VM_DECOMPRESSOR_STAT(compressor_stats
.lz4_decompressed_bytes
+= csize
);
396 #if DEVELOPMENT || DEBUG
397 uint32_t *d32
= dest
;
399 assertf(rval
== PAGE_SIZE
, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
400 rval
, *d32
, *(d32
+ 1), *(d32
+ 2));
401 success
= (rval
== PAGE_SIZE
);
403 assert(ccodec
== CCWK
);
405 success
= WKdmD(source
, dest
, &compressor_dscratch
->wkdecompscratch
[0], csize
, &pop_count
);
407 VM_DECOMPRESSOR_STAT(compressor_stats
.wk_decompressions
+= 1);
408 VM_DECOMPRESSOR_STAT(compressor_stats
.wk_decompressed_bytes
+= csize
);
411 assert(pop_count_p
!= NULL
);
412 *pop_count_p
= pop_count
;
415 #pragma clang diagnostic pop
418 vm_compressor_get_encode_scratch_size(void)
420 if (vm_compressor_current_codec
!= VM_COMPRESSOR_DEFAULT_CODEC
) {
421 return MAX(sizeof(compressor_encode_scratch_t
), WKdm_SCRATCH_BUF_SIZE_INTERNAL
);
423 return WKdm_SCRATCH_BUF_SIZE_INTERNAL
;
428 vm_compressor_get_decode_scratch_size(void)
430 if (vm_compressor_current_codec
!= VM_COMPRESSOR_DEFAULT_CODEC
) {
431 return MAX(sizeof(compressor_decode_scratch_t
), WKdm_SCRATCH_BUF_SIZE_INTERNAL
);
433 return WKdm_SCRATCH_BUF_SIZE_INTERNAL
;
439 vm_compressor_algorithm(void)
441 return vm_compressor_current_codec
;
445 vm_compressor_algorithm_init(void)
447 vm_compressor_mode_t new_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
449 #if defined(__arm64__)
450 new_codec
= CMODE_HYB
;
452 if (PAGE_SIZE
== 16384) {
453 vmctune
.lz4_threshold
= 12288;
457 PE_parse_boot_argn("vm_compressor_codec", &new_codec
, sizeof(new_codec
));
458 assertf(((new_codec
== VM_COMPRESSOR_DEFAULT_CODEC
) || (new_codec
== CMODE_WK
) ||
459 (new_codec
== CMODE_LZ4
) || (new_codec
== CMODE_HYB
)),
460 "Invalid VM compression codec: %u", new_codec
);
462 #if defined(__arm__) || defined(__arm64__)
464 if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc
, sizeof(tmpc
))) {
465 new_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
466 } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc
, sizeof(tmpc
))) {
467 new_codec
= CMODE_HYB
;
470 vm_compressor_current_codec
= new_codec
;
471 #endif /* arm/arm64 */